micromamba install -n r_libs r-pecotmr

# If an error occurs while sourcing scripts, it might be because your get() returned NULL. 
#Please restart the kernel or click the R kernel in the upper right corner to resolve the issue.
source('../../codes/cb_plot.R')
source('../../codes/utilis.R')

for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'WDR12'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/WDR12/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/WDR12/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'WDR12')

options(repr.plot.width = 10, repr.plot.height = 10)

for (mash_p_tmp in mash_p) {
    print(mash_p_tmp)
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.WDR12.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_contexts <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(WDR12_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for WDR12 csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/WDR12/sec11.interaction_association_WDR12_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
   p =  ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/WDR12/sec12.trans_fine_mapping_',gene_name,'.pdf'),p, height = 5, width = 8)
    p
    } else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000138442	chr2	201874261	204014798	203014797	chr2:201677542-203951659,chr2:203951659-207254319	2_201677542-203951659,2_203951659-207254319	2_201677542-203951659,2_203951659_207254319	TADB_217,TADB_218,TADB_219	chr2_199844239_203567751,chr2_201265735_204961577,chr2_202936762_207159509	203014798	202874261	chr2:196447744-202229406,chr2:196804592-203567751,chr2:198867396-204961577,chr2:199844239-207159509,chr2:201265735-209854503,chr2:202936762-211266074	WDR12

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000138442	chr2	201874261	204014798	203014797	chr2:201677542-203951659,chr2:203951659-207254319	2_201677542-203951659,2_203951659-207254319	2_201677542-203951659,2_203951659_207254319	TADB_217,TADB_218,TADB_219	chr2_199844239_203567751,chr2_201265735_204961577,chr2_202936762_207159509	203014798	202874261	chr2:196447744-202229406,chr2:196804592-203567751,chr2:198867396-204961577,chr2:199844239-207159509,chr2:201265735-209854503,chr2:202936762-211266074	WDR12

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
DLPFC; AC; PCC	1.0000000	2	0.49953514	3873; 4215	chr2:202906576:G:A; chr2:203001075:CGTGTGTGTGTGT:TGTGTGTGTGTGT	chr2:202906576:G:A	coloc_sets:Y7_Y8_Y9:CS1
Exc; DLPFC; AC; PCC; PCC_productive	0.9567232	6	0.27074360	3599; 3650; 3608; 3627; 3906; 4061	chr2:202841064:G:C; chr2:202856022:G:A; chr2:202843584:A:G; chr2:202916162:TTTTATTTATTTA:TTTTATTTA; chr2:202850250:G:C; chr2:202959815:CAA:CA	chr2:202841064:G:C	coloc_sets:Y5_Y7_Y8_Y9_Y16:CS3
DLPFC; AC; PCC	0.8976547	36	0.06094257	3544; 3554; 3574; 3630; 3632; 3727; 3779; 3829; 3864; 3931; 3769; 3912; 3547; 3708; 3774; 3909; 3753; 3903; 3546; 3954; 3839; 4262; 4196; 4225; 4253; 4254; 4261; 4324; 3970; 4033; 4038; 4082; 4110; 4145; 3840; 4367	chr2:202825664:G:A; chr2:202828836:C:G; chr2:202834693:G:T; chr2:202850789:C:T; chr2:202850905:G:T; chr2:202874289:C:T; chr2:202886008:C:G; chr2:202896851:A:G; chr2:202903616:C:T; chr2:202928093:C:T; chr2:202916348:C:T; chr2:202918486:T:C; chr2:202898078:CA:C; chr2:202826558:A:G; chr2:202827287:A:G; chr2:202867547:T:C; chr2:202884669:C:CTTT; chr2:202878969:A:G; chr2:202883871:A:G; chr2:203018272:C:CT; chr2:202914796:C:A; chr2:202898079:AC:CC; chr2:203056547:A:G; chr2:202996143:G:C; chr2:203003634:T:A; chr2:203013429:C:T; chr2:203014780:A:G; chr2:203017975:C:A; chr2:202935324:TTATATA:TTA; chr2:203040452:G:T; chr2:202940311:A:G; chr2:202953852:A:G; chr2:202955052:A:T; chr2:202964301:T:A; chr2:202969678:C:G; chr2:202980923:T:C	chr2:202825664:G:A	coloc_sets:Y7_Y8_Y9:CS2

	variants	DLPFC	AC	PCC
	<chr>	<dbl>	<dbl>	<dbl>
chr2:202906576:G:A	chr2:202906576:G:A	-3.935098	-6.13314	-6.250894
chr2:203001075:CGTGTGTGTGTGT:TGTGTGTGTGTGT	chr2:203001075:CGTGTGTGTGTGT:TGTGTGTGTGTGT	-3.935098	-6.13314	-6.250894

	variants	Exc	DLPFC	AC	PCC	PCC_productive
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr2:202841064:G:C	chr2:202841064:G:C	6.468868	11.41625	8.801522	5.823800	5.651822
chr2:202856022:G:A	chr2:202856022:G:A	6.468868	11.41625	8.801522	5.823800	5.651822
chr2:202843584:A:G	chr2:202843584:A:G	6.559777	11.24594	8.684280	5.823800	5.651822
chr2:202850250:G:C	chr2:202850250:G:C	6.674872	11.32890	8.712186	5.680194	5.572117
chr2:202916162:TTTTATTTATTTA:TTTTATTTA	chr2:202916162:TTTTATTTATTTA:TTTTATTTA	6.906601	11.13219	8.649516	5.802465	5.120113
chr2:202959815:CAA:CA	chr2:202959815:CAA:CA	6.586078	11.01812	8.525877	5.616777	5.637198

Case study: WDR12 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶

coloc_csets_1	coloc_csets_2	min_abs_cor	max_abs_cor	median_abs_cor
coloc_sets:Y7_Y8_Y9:CS1	coloc_sets:Y5_Y7_Y8_Y9_Y16:CS3	0.0466648729700563	0.050175603913272	0.0476588716079472
coloc_sets:Y7_Y8_Y9:CS1	coloc_sets:Y7_Y8_Y9:CS2	0.042877456385991	0.0484915152410162	0.0476704953046012
coloc_sets:Y5_Y7_Y8_Y9_Y16:CS3	coloc_sets:Y7_Y8_Y9:CS2	0.469795574415134	0.522660476572075	0.508064572013709

A data.frame: 36 x 4
	variants	DLPFC	AC	PCC
	<chr>	<dbl>	<dbl>	<dbl>
chr2:202825664:G:A	chr2:202825664:G:A	8.610602	8.759280	5.897966
chr2:202828836:C:G	chr2:202828836:C:G	8.610602	8.759280	5.897966
chr2:202834693:G:T	chr2:202834693:G:T	8.610602	8.759280	5.897966
chr2:202850789:C:T	chr2:202850789:C:T	8.610602	8.759280	5.897966
chr2:202850905:G:T	chr2:202850905:G:T	8.610602	8.759280	5.897966
chr2:202874289:C:T	chr2:202874289:C:T	8.610602	8.759280	5.897966
chr2:202886008:C:G	chr2:202886008:C:G	8.610602	8.759280	5.897966
chr2:202896851:A:G	chr2:202896851:A:G	8.610602	8.759280	5.897966
chr2:202903616:C:T	chr2:202903616:C:T	8.610602	8.759280	5.897966
chr2:202928093:C:T	chr2:202928093:C:T	8.610602	8.759280	5.897966
chr2:202883871:A:G	chr2:202883871:A:G	8.698313	8.592295	5.845422
chr2:202918486:T:C	chr2:202918486:T:C	8.639583	8.631226	5.845422
chr2:202827287:A:G	chr2:202827287:A:G	8.563309	8.646358	5.845422
chr2:202867547:T:C	chr2:202867547:T:C	8.563309	8.646358	5.845422
chr2:202884669:C:CTTT	chr2:202884669:C:CTTT	8.563309	8.646358	5.845422
chr2:202916348:C:T	chr2:202916348:C:T	8.459777	8.443372	5.959266
chr2:202878969:A:G	chr2:202878969:A:G	8.563887	8.548605	5.840398
chr2:202914796:C:A	chr2:202914796:C:A	8.435272	8.287633	5.937400
chr2:202826558:A:G	chr2:202826558:A:G	8.382462	8.646358	5.845422
chr2:202935324:TTATATA:TTA	chr2:202935324:TTATATA:TTA	8.539323	8.587844	5.581173
chr2:202898078:CA:C	chr2:202898078:CA:C	8.334125	8.515161	5.896910
chr2:203018272:C:CT	chr2:203018272:C:CT	8.112531	8.739257	5.670492
chr2:202996143:G:C	chr2:202996143:G:C	8.274524	8.651068	5.679455
chr2:203003634:T:A	chr2:203003634:T:A	8.260394	8.651068	5.679455
chr2:203013429:C:T	chr2:203013429:C:T	8.260394	8.651068	5.679455
chr2:203014780:A:G	chr2:203014780:A:G	8.260394	8.651068	5.679455
chr2:203017975:C:A	chr2:203017975:C:A	8.260394	8.651068	5.679455
chr2:203040452:G:T	chr2:203040452:G:T	8.260394	8.651068	5.679455
chr2:202940311:A:G	chr2:202940311:A:G	8.464034	8.617185	5.615000
chr2:202953852:A:G	chr2:202953852:A:G	8.464034	8.617185	5.615000
chr2:202955052:A:T	chr2:202955052:A:T	8.464034	8.617185	5.615000
chr2:202964301:T:A	chr2:202964301:T:A	8.464034	8.617185	5.615000
chr2:202969678:C:G	chr2:202969678:C:G	8.464034	8.617185	5.615000
chr2:202980923:T:C	chr2:202980923:T:C	8.464034	8.617185	5.615000
chr2:202898079:AC:CC	chr2:202898079:AC:CC	-7.989343	-8.357625	-5.896910
chr2:203056547:A:G	chr2:203056547:A:G	7.302074	8.645431	5.679455

A data.frame: 18 x 6
gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000003393	chr2	201782111	201782112	ALS2	BM_22_MSBB_eQTL,ROSMAP_AC_sQTL
ENSG00000055044	chr2	202265735	202265736	NOP58	MiGA_GTS_eQTL,MiGA_THA_eQTL,ROSMAP_PCC_sQTL
ENSG00000064012	chr2	201233442	201233443	CASP8	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000115993	chr2	201451499	201451500	TRAK2	Knight_eQTL,ROSMAP_AC_sQTL
ENSG00000116030	chr2	202238598	202238599	SUMO1	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000118257	chr2	205681989	205681990	NRP2	DLPFC_Bennett_pQTL
ENSG00000138380	chr2	202912213	202912214	CARF	AC_DeJager_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000138439	chr2	202634968	202634969	FAM117B	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000138443	chr2	203328279	203328280	ABI2	STARNET_eQTL
ENSG00000144426	chr2	203014878	203014879	NBEAL1	AC_DeJager_eQTL,Exc_mega_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000155749	chr2	201357397	201357398	FLACC1	ROSMAP_DLPFC_sQTL
ENSG00000155755	chr2	201643569	201643570	TMEM237	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000163596	chr2	202871765	202871766	ICA1L	Ast_DeJager_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000163599	chr2	203867770	203867771	CTLA4	MiGA_SVZ_eQTL
ENSG00000173166	chr2	203535334	203535335	RAPH1	MSBB_BM36_pQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000182329	chr2	202073254	202073255	KIAA2012	ROSMAP_AC_sQTL
ENSG00000196290	chr2	200889326	200889327	NIF3L1	MiGA_SVZ_eQTL
ENSG00000204217	chr2	202376326	202376327	BMPR2	MiGA_GTS_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL