micromamba install -n r_libs r-pecotmr
micromamba install -n r_libs r-bedmatrix

source('/data/interactive_analysis/rf2872/codes/cb_plot.R')
source('/data/interactive_analysis/rf2872/codes/utilis.R')
for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'BLNK'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('/data/interactive_analysis/rf2872/codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/BLNK/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.Error : File '/data/GWAS/ADGWAS_sumstats/10_95280986-98320874.RSS_QC_RAISS_imputed.AD_Kunkle_Stage1_2019.sumstats.tsv.gz' does not exist or is non-readable. getwd()=='/data/interactive_analysis/lz2838/xqtl-paper/AD_targets/BLNK'
No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/BLNK/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

plot_TWAS_res(gene_id = gene_id)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.BLNK.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

$pip_plot

$effect_plot

$z_plot
NULL

$effects
                           L2            L1
ENSG00000165338 -5.436711e-08 -1.223854e-05
ENSG00000148690 -5.436711e-08 -1.223854e-05
ENSG00000173145 -5.436711e-08 -1.223854e-05
ENSG00000108239 -5.436711e-08 -1.223854e-05
ENSG00000188649 -5.436711e-08 -1.223854e-05
ENSG00000095585 -5.436711e-08 -1.223854e-05

$pip_plot

$effect_plot

$z_plot
NULL

$effects
                       L4           L2         L3      L1
ENSG00000173145 0.4128248 3.291789e-06 0.02611942 0.02831
ENSG00000108239 0.4128248 3.291789e-06 0.02611942 0.02831
ENSG00000188649 0.4128248 3.291789e-06 0.02611942 0.02831
ENSG00000095585 0.4128248 3.291789e-06 0.02611942 0.02831
ENSG00000166024 0.4128248 3.291789e-06 0.02611942 0.02831

$pip_plot

$effect_plot

$z_plot
NULL

$effects
                         L2         L1
ENSG00000188649 0.004220688 0.00280786
ENSG00000095585 0.004220688 0.00280786
ENSG00000166024 0.004220688 0.00280786

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(BLNK_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for BLNK csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/BLNK/sec11.interaction_association_BLNK_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

func_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
    ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/BLNK/sec12.trans_fine_mapping_',gene_name,'.pdf'), height = 5, width = 8)
} else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000095585	chr10	94520000	97760000	96271586	chr10:93602293-95280986,chr10:95280986-98320874	10_93602293-95280986,10_95280986-98320874	10_93602293-95280986,10_95280986_98320874	TADB_854,TADB_855	chr10_91451684_96291012,chr10_93402540_98760907	96271587	96189171	chr10:83138420-94806272,chr10:85935540-96291012,chr10:89740823-98760907,chr10:91451684-101286680,chr10:93402540-103120368,chr10:96435909-104855543	BLNK

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
Mic; DLPFC; AC; PCC; AD_Bellenguez_2022	0.9867176	15	0.1124434	8364; 8324; 8293; 8291; 8323; 8295; 8315; 8321; 8286; 8282; 8259; 8247; 8277; 8214; 8217	chr10:96281986:CA:C; chr10:96266650:G:A; chr10:96259400:A:G; chr10:96259356:A:G; chr10:96266554:A:G; chr10:96259827:T:C; chr10:96265101:G:A; chr10:96266318:A:G; chr10:96258109:A:G; chr10:96255961:T:C; chr10:96253690:T:A; chr10:96251179:T:A; chr10:96255397:A:T; chr10:96244776:C:T; chr10:96245257:A:C	chr10:96281986:CA:C	coloc_sets:Y1_Y2_Y3_Y4_Y12:CS2
AC_unproductive; PCC_unproductive	0.9244346	2	0.8547135	7681; 7730	chr10:96142826:G:A; chr10:96153763:G:A	chr10:96142826:G:A	coloc_sets:Y7_Y11:CS1

	variants	Mic	DLPFC	AC	PCC	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr10:96281986:CA:C	chr10:96281986:CA:C	-8.874665	-8.030545	-8.430595	-8.247098	-5.238739
chr10:96266650:G:A	chr10:96266650:G:A	8.654664	8.034742	8.644846	8.247098	5.224299
chr10:96259400:A:G	chr10:96259400:A:G	8.654664	8.034742	8.644846	8.247098	5.210280
chr10:96259356:A:G	chr10:96259356:A:G	8.654664	8.034742	8.644846	8.247098	5.210280
chr10:96266554:A:G	chr10:96266554:A:G	8.654664	8.034742	8.644846	8.247098	5.205607
chr10:96259827:T:C	chr10:96259827:T:C	8.654664	8.034742	8.644846	8.247098	5.200935
chr10:96265101:G:A	chr10:96265101:G:A	-8.654664	-8.034742	-8.644846	-8.247098	-5.186047
chr10:96266318:A:G	chr10:96266318:A:G	8.654664	8.034742	8.644846	8.247098	5.182243
chr10:96258109:A:G	chr10:96258109:A:G	-8.654664	-8.034742	-8.644846	-8.247098	-5.162791
chr10:96255961:T:C	chr10:96255961:T:C	-8.713714	-8.028471	-8.348343	-8.311296	-5.148837
chr10:96253690:T:A	chr10:96253690:T:A	-8.713714	-8.028471	-8.348343	-8.311296	-5.139535
chr10:96251179:T:A	chr10:96251179:T:A	-8.713714	-8.028471	-8.348343	-8.311296	-5.097222
chr10:96255397:A:T	chr10:96255397:A:T	-8.654664	-8.028471	-8.348343	-8.311296	-5.162791
chr10:96244776:C:T	chr10:96244776:C:T	8.654664	8.028471	8.348343	8.311296	5.100917
chr10:96245257:A:C	chr10:96245257:A:C	-8.654664	-8.028471	-8.348343	-8.311296	-4.954545

	variants	AC_unproductive	PCC_unproductive
	<chr>	<dbl>	<dbl>
chr10:96142826:G:A	chr10:96142826:G:A	16.83003	14.78811
chr10:96153763:G:A	chr10:96153763:G:A	16.18125	13.06718

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000052749	chr10	97426075	97426076	RRP12	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000059573	chr10	95656710	95656711	ALDH18A1	ROSMAP_DLPFC_sQTL
ENSG00000077147	chr10	96587451	96587452	TM9SF3	ROSMAP_AC_sQTL
ENSG00000095587	chr10	96513925	96513926	TLL2	Oli_DeJager_eQTL,Inh_Kellis_eQTL,Inh_mega_eQTL,ROSMAP_PCC_sQTL
ENSG00000095637	chr10	95561413	95561414	SORBS1	Exc_mega_eQTL,DLPFC_Bennett_pQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000108231	chr10	93757839	93757840	LGI1	MiGA_GTS_eQTL,DLPFC_Klein_gpQTL
ENSG00000108239	chr10	94402540	94402541	TBC1D12	MiGA_THA_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000119943	chr10	98415181	98415182	PYROXD2	ROSMAP_DLPFC_sQTL
ENSG00000119969	chr10	94501433	94501434	HELLS	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000119977	chr10	95694142	95694143	TCTN3	Exc_DeJager_eQTL,Exc_Kellis_eQTL,Exc_mega_eQTL,Inh_mega_eQTL
ENSG00000119986	chr10	97687240	97687241	AVPI1	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000138119	chr10	93482333	93482334	MYOF	ROSMAP_PCC_sQTL
ENSG00000138160	chr10	92574104	92574105	KIF11	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000138185	chr10	95711778	95711779	ENTPD1	MiGA_SVZ_eQTL,AC_DeJager_eQTL
ENSG00000138193	chr10	93993930	93993931	PLCE1	ROSMAP_DLPFC_sQTL
ENSG00000155229	chr10	97498793	97498794	MMS19	Inh_mega_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000155252	chr10	97640685	97640686	PI4K2A	MiGA_SVZ_eQTL
ENSG00000155254	chr10	97713172	97713173	MARVELD1	MiGA_GTS_eQTL
ENSG00000155256	chr10	97737120	97737121	ZFYVE27	ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000165879	chr10	97319270	97319271	FRAT1	MiGA_THA_eQTL
ENSG00000165886	chr10	97498923	97498924	UBTD1	ROSMAP_DLPFC_sQTL
ENSG00000171160	chr10	97633499	97633500	MORN4	MiGA_SVZ_eQTL,BM_10_MSBB_eQTL,BM_36_MSBB_eQTL,BM_44_MSBB_eQTL,PCC_DeJager_eQTL
ENSG00000171307	chr10	97446169	97446170	ZDHHC16	MiGA_THA_eQTL
ENSG00000171311	chr10	97446016	97446017	EXOSC1	ROSMAP_PCC_sQTL
ENSG00000171314	chr10	97426190	97426191	PGAM1	MiGA_SVZ_eQTL
ENSG00000172987	chr10	99235861	99235862	HPSE2	ROSMAP_DLPFC_sQTL
ENSG00000173124	chr10	95194199	95194200	ACSM6	MiGA_GFM_eQTL
ENSG00000177853	chr10	96129714	96129715	ZNF518A	MiGA_GTS_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000181274	chr10	97334728	97334729	FRAT2	MiGA_THA_eQTL
ENSG00000187122	chr10	97185958	97185959	SLIT1	MiGA_GFM_eQTL,MiGA_THA_eQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000188649	chr10	95907602	95907603	CC2D2B	MiGA_SVZ_eQTL,AC_DeJager_eQTL
ENSG00000196233	chr10	96832253	96832254	LCOR	BM_36_MSBB_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000197430	chr10	96359364	96359365	OPALIN	MiGA_SVZ_eQTL,MiGA_THA_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000269891	chr10	97292636	97292637	ARHGAP19-SLIT1	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000276490	chr10	94688153	94688154	AL583836.1	ROSMAP_AC_sQTL
ENSG00000285932	chr10	99732111	99732112	AL133353.2	ROSMAP_DLPFC_sQTL

Case study: BLNK xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶