micromamba install -n r_libs r-pecotmr
micromamba install -n r_libs r-bedmatrix

source('/data/interactive_analysis/rf2872/codes/cb_plot.R')
source('/data/interactive_analysis/rf2872/codes/utilis.R')
for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'USP6NL'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('/data/interactive_analysis/rf2872/codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/USP6NL/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.Error in fread(paste0("/data/GWAS/ADGWAS_sumstats/", block, ".RSS_QC_RAISS_imputed.",  : 
  File '/data/GWAS/ADGWAS_sumstats/10_8883571-10500888.RSS_QC_RAISS_imputed.AD_Kunkle_Stage1_2019.sumstats.tsv.gz' does not exist or is non-readable. getwd()=='/data/interactive_analysis/lz2838/xqtl-paper/AD_targets/USP6NL'
No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/USP6NL/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

plot_TWAS_res(gene_id = gene_id)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.USP6NL.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

$pip_plot

$effect_plot

$z_plot
NULL

$effects
                        L2         L1
ENSG00000170525 -0.2136777 0.03393311
ENSG00000048740 -0.2136777 0.03393311
ENSG00000148429 -0.2136777 0.03393311

$pip_plot

$effect_plot

$z_plot
NULL

$effects
                        L1         L2
ENSG00000048740 0.01925579 0.07385331
ENSG00000148429 0.01925579 0.07385331
ENSG00000183049 0.01925579 0.07385331

$pip_plot

$effect_plot

$z_plot
NULL

$effects
                      L1         L3         L2
ENSG00000148429 0.428056 0.04816641 0.05349611
ENSG00000183049 0.428056 0.04816641 0.05349611
ENSG00000065809 0.428056 0.04816641 0.05349611

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(USP6NL_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for USP6NL csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/USP6NL/sec11.interaction_association_USP6NL_lessPIP25.pdf', height = 5, width = 8)

FIXME

vars_p

apoe_p

FIXME

func_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
    ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/USP6NL/sec12.trans_fine_mapping_',gene_name,'.pdf'), height = 5, width = 8)
} else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000148429	chr10	9320000	12611754	11611753	chr10:8883571-10500888,chr10:10500888-12817813	10_8883571-10500888,10_10500888-12817813	10_8883571-10500888,10_10500888_12817813	TADB_815,TADB_816	chr10_8721963_13123221,chr10_10823338_14348298	11611754	11453946	chr10:2066332-10309772,chr10:4638867-13123221,chr10:6486703-14348298,chr10:8721963-16371289,chr10:10823338-18911164,chr10:11896625-19681639	USP6NL

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
Exc; Inh; DLPFC; AC	1.0000000	1	0.9827712	11316	chr10:11611767:T:C	chr10:11611767:T:C	coloc_sets:Y5_Y6_Y7_Y8:CS2
Oli; Inh	0.9859219	19	0.2335635	11041; 11016; 11017; 10893; 10933; 11317; 10915; 10922; 10987; 11221; 11293; 11299; 10743; 11236; 10938; 10792; 10795; 10835; 10884	chr10:11537547:G:A; chr10:11530303:C:T; chr10:11530320:G:A; chr10:11501662:G:A; chr10:11510638:A:AGT; chr10:11611773:C:CGCGGCCCCGG; chr10:11506734:ATTT:AT; chr10:11508164:TC:T; chr10:11522422:C:T; chr10:11580291:T:C; chr10:11602109:C:A; chr10:11605066:A:T; chr10:11471201:C:A; chr10:11583176:T:C; chr10:11511828:TACACACAC:TACACAC; chr10:11481311:T:TG; chr10:11482027:G:A; chr10:11487869:G:A; chr10:11498764:G:C	chr10:11611773:C:CGCGGCCCCGG	coloc_sets:Y3_Y6:CS3
AC_unproductive; PCC_unproductive	0.9847566	3	0.5272430	11366; 11159; 11331	chr10:11617147:A:G; chr10:11562400:T:A; chr10:11614601:T:C	chr10:11614601:T:C	coloc_sets:Y13_Y17:CS1
Mic; AC	0.9246054	8	0.8486096	11785; 11788; 11742; 11704; 11746; 11768; 11773; 11778	chr10:11678309:A:G; chr10:11678621:C:T; chr10:11665916:A:G; chr10:11672687:C:T; chr10:11672508:C:T; chr10:11675398:T:C; chr10:11676332:T:C; chr10:11677075:T:G	chr10:11678309:A:G	coloc_sets:Y1_Y8:MergeCS1

	variants	Exc	Inh	DLPFC	AC
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>
chr10:11611767:T:C	chr10:11611767:T:C	-5.049365	-4.113005	-6.217721	-7.393575

	variants	Oli	Inh
	<chr>	<dbl>	<dbl>
chr10:11537547:G:A	chr10:11537547:G:A	11.59249	9.055687
chr10:11530303:C:T	chr10:11530303:C:T	11.70560	8.831806
chr10:11530320:G:A	chr10:11530320:G:A	11.70560	8.831806
chr10:11501662:G:A	chr10:11501662:G:A	11.63148	8.853603
chr10:11510638:A:AGT	chr10:11510638:A:AGT	11.63148	8.853603
chr10:11611773:C:CGCGGCCCCGG	chr10:11611773:C:CGCGGCCCCGG	11.72203	8.694048
chr10:11506734:ATTT:AT	chr10:11506734:ATTT:AT	11.65693	8.755211
chr10:11508164:TC:T	chr10:11508164:TC:T	11.65693	8.755211
chr10:11522422:C:T	chr10:11522422:C:T	11.65693	8.755211
chr10:11580291:T:C	chr10:11580291:T:C	11.65693	8.755211
chr10:11602109:C:A	chr10:11602109:C:A	11.65693	8.755211
chr10:11605066:A:T	chr10:11605066:A:T	11.65693	8.755211
chr10:11471201:C:A	chr10:11471201:C:A	11.70129	8.672623
chr10:11583176:T:C	chr10:11583176:T:C	11.63491	8.685068
chr10:11511828:TACACACAC:TACACAC	chr10:11511828:TACACACAC:TACACAC	11.63002	8.654840
chr10:11481311:T:TG	chr10:11481311:T:TG	11.43298	8.707366
chr10:11482027:G:A	chr10:11482027:G:A	11.43298	8.707366
chr10:11487869:G:A	chr10:11487869:G:A	11.43298	8.707366
chr10:11498764:G:C	chr10:11498764:G:C	11.43298	8.707366

	variants	AC_unproductive	PCC_unproductive
	<chr>	<dbl>	<dbl>
chr10:11617147:A:G	chr10:11617147:A:G	16.11899	12.44555
chr10:11562400:T:A	chr10:11562400:T:A	15.86990	12.57386
chr10:11614601:T:C	chr10:11614601:T:C	16.11930	12.28894

Case study: USP6NL xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶

coloc_csets_1	coloc_csets_2	min_abs_cor	max_abs_cor	median_abs_cor
coloc_sets:Y5_Y6_Y7_Y8:CS2	coloc_sets:Y3_Y6:CS3	0.0666694235794254	0.0688450776323751	0.0681293658728584
coloc_sets:Y5_Y6_Y7_Y8:CS2	coloc_sets:Y13_Y17:CS1	0.0710242116464201	0.0727572349659237	0.0720665838541517
coloc_sets:Y5_Y6_Y7_Y8:CS2	coloc_sets:Y1_Y8:MergeCS1	0.0160672087438717	0.0352048491817816	0.0271634283965724
coloc_sets:Y3_Y6:CS3	coloc_sets:Y13_Y17:CS1	0.295982543603216	0.308229503594267	0.302733297782435
coloc_sets:Y3_Y6:CS3	coloc_sets:Y1_Y8:MergeCS1	0.0570612580141974	0.107026837211434	0.0818219088631476
coloc_sets:Y13_Y17:CS1	coloc_sets:Y1_Y8:MergeCS1	0.239274425589726	0.300346221885692	0.28420718660991

A data.frame: 8 x 3
	variants	Mic	AC
	<chr>	<dbl>	<dbl>
chr10:11678309:A:G	chr10:11678309:A:G	14.65267	7.101422
chr10:11678621:C:T	chr10:11678621:C:T	14.25159	7.176121
chr10:11672508:C:T	chr10:11672508:C:T	12.50684	7.142168
chr10:11665916:A:G	chr10:11665916:A:G	12.26185	7.251752
chr10:11672687:C:T	chr10:11672687:C:T	12.40079	7.217319
chr10:11675398:T:C	chr10:11675398:T:C	12.71613	7.070278
chr10:11676332:T:C	chr10:11676332:T:C	12.71613	7.034674
chr10:11677075:T:G	chr10:11677075:T:G	12.69696	6.930016

A data.frame: 10 x 6
gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000048740	chr10	10798396	10798397	CELF2	MiGA_GTS_eQTL,BM_36_MSBB_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000065665	chr10	12129636	12129637	SEC61A2	MiGA_THA_eQTL,BM_10_MSBB_eQTL,BM_36_MSBB_eQTL,Oli_DeJager_eQTL,Exc_DeJager_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL,Exc_Kellis_eQTL,Exc_mega_eQTL,Oli_mega_eQTL,DLPFC_Bennett_pQTL,ROSMAP_DLPFC_sQTL
ENSG00000086475	chr10	13348297	13348298	SEPHS1	ROSMAP_DLPFC_sQTL
ENSG00000123240	chr10	13099448	13099449	OPTN	MSBB_BM36_pQTL
ENSG00000134463	chr10	11742365	11742366	ECHDC3	Knight_eQTL,MiGA_GTS_eQTL,BM_10_MSBB_eQTL,BM_22_MSBB_eQTL,BM_44_MSBB_eQTL,MSBB_BM36_pQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000148426	chr10	11823338	11823339	PROSER2	STARNET_eQTL
ENSG00000151461	chr10	12043169	12043170	UPF2	MiGA_GTS_eQTL
ENSG00000165609	chr10	12196143	12196144	NUDT5	Exc_mega_eQTL,Oli_mega_eQTL,ROSMAP_PCC_sQTL
ENSG00000181192	chr10	12068953	12068954	DHTKD1	BM_10_MSBB_eQTL,Inh_DeJager_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000183049	chr10	12349546	12349547	CAMK1D	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL,STARNET_eQTL