micromamba install -n r_libs r-pecotmr

# If an error occurs while sourcing scripts, it might be because your get() returned NULL. 
#Please restart the kernel or click the R kernel in the upper right corner to resolve the issue.
source('../../codes/cb_plot.R')
source('../../codes/utilis.R')

for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'ACE'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/ACE/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/ACE/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'ACE')

options(repr.plot.width = 10, repr.plot.height = 10)

for (mash_p_tmp in mash_p) {
    print(mash_p_tmp)
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.ACE.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_contexts <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(ACE_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for ACE csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/ACE/sec11.interaction_association_ACE_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
   p =  ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/ACE/sec12.trans_fine_mapping_',gene_name,'.pdf'),p, height = 5, width = 8)
    p
    } else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000159640	chr17	62240000	64498380	63477060	chr17:60570445-65149278	17_60570445-65149278	17_60570445-65149278	TADB_1204,TADB_1205,TADB_1206	chr17_59677452_63615481,chr17_61231470_64931385,chr17_62832081_65919480	63477061	63498380	chr17:57192725-62392838,chr17:57982638-63615481,chr17:58565557-64931385,chr17:59677452-65919480,chr17:61231470-67256525,chr17:62832081-69046854,chr17:63747264-70327244	ACE

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000159640	chr17	62240000	64498380	63477060	chr17:60570445-65149278	17_60570445-65149278	17_60570445-65149278	TADB_1204,TADB_1205,TADB_1206	chr17_59677452_63615481,chr17_61231470_64931385,chr17_62832081_65919480	63477061	63498380	chr17:57192725-62392838,chr17:57982638-63615481,chr17:58565557-64931385,chr17:59677452-65919480,chr17:61231470-67256525,chr17:62832081-69046854,chr17:63747264-70327244	ACE

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
Exc; DLPFC; PCC; pQTL; AD_Bellenguez_2022	0.9928046	2	0.8495075	4218; 4223	chr17:63476980:C:T; chr17:63478937:C:G	chr17:63476980:C:T	coloc_sets:Y1_Y2_Y4_Y6_Y13:CS1

	variants	Exc	DLPFC	PCC	pQTL	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr17:63476980:C:T	chr17:63476980:C:T	-5.617463	-12.08937	-15.52633	-6.902631	8.190476
chr17:63478937:C:G	chr17:63478937:C:G	-5.359208	-11.90218	-15.41718	-7.008341	8.107143

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000008283	chr17	63446353	63446354	CYB561	BM_22_MSBB_eQTL,PCC_DeJager_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000011028	chr17	62627669	62627670	MRC2	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000087191	chr17	63827151	63827152	PSMC5	MiGA_GTS_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000087995	chr17	62423890	62423891	METTL2A	BM_10_MSBB_eQTL,ROSMAP_PCC_sQTL
ENSG00000108370	chr17	65100811	65100812	RGS9	ROSMAP_DLPFC_sQTL
ENSG00000108510	chr17	62065277	62065278	MED13	MiGA_GTS_eQTL
ENSG00000108588	chr17	63776350	63776351	CCDC47	MiGA_SVZ_eQTL
ENSG00000108592	chr17	63830011	63830012	FTSJ3	MiGA_GFM_eQTL,BM_10_MSBB_eQTL,BM_22_MSBB_eQTL,BM_36_MSBB_eQTL,BM_44_MSBB_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000108604	chr17	63843064	63843065	SMARCD2	MiGA_SVZ_eQTL,BM_22_MSBB_eQTL,PCC_DeJager_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000108622	chr17	64020633	64020634	ICAM2	MiGA_GTS_eQTL
ENSG00000108654	chr17	64508198	64508199	DDX5	Mic_13_Kellis_eQTL,Inh_mega_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000108854	chr17	64662306	64662307	SMURF2	ROSMAP_PCC_sQTL
ENSG00000125695	chr17	63752096	63752097	AC046185.1	BM_10_MSBB_eQTL
ENSG00000136463	chr17	63600894	63600895	TACO1	MiGA_SVZ_eQTL
ENSG00000136478	chr17	64263259	64263260	TEX2	Inh_Kellis_eQTL,Oli_mega_eQTL,ROSMAP_AC_sQTL
ENSG00000136485	chr17	63550476	63550477	DCAF7	Inh_DeJager_eQTL
ENSG00000136490	chr17	63701171	63701172	LIMD2	MiGA_GTS_eQTL,MiGA_SVZ_eQTL
ENSG00000136492	chr17	61863527	61863528	BRIP1	MiGA_GTS_eQTL,MiGA_SVZ_eQTL
ENSG00000141376	chr17	60677452	60677453	BCAS3	ROSMAP_DLPFC_sQTL
ENSG00000146872	chr17	62458657	62458658	TLK2	MiGA_SVZ_eQTL
ENSG00000170921	chr17	63009555	63009556	TANC2	OPC_Kellis_eQTL,Ast_10_Kellis_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000173826	chr17	63523333	63523334	KCNH6	ROSMAP_DLPFC_sQTL
ENSG00000176809	chr17	64919479	64919480	LRRC37A3	MiGA_SVZ_eQTL,Inh_Kellis_eQTL
ENSG00000178607	chr17	64130818	64130819	ERN1	Exc_DeJager_eQTL,ROSMAP_PCC_sQTL
ENSG00000198231	chr17	63773602	63773603	DDX42	MiGA_GFM_eQTL,MiGA_THA_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000198909	chr17	63622414	63622415	MAP3K3	Oli_DeJager_eQTL
ENSG00000224383	chr17	63998350	63998351	PRR29	DLPFC_DeJager_eQTL,AC_DeJager_eQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000253506	chr17	61591218	61591219	NACA2	MiGA_GTS_eQTL
ENSG00000261371	chr17	64413775	64413776	PECAM1	MiGA_SVZ_eQTL
ENSG00000264813	chr17	63484822	63484823	AC113554.1	BM_44_MSBB_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,ROSMAP_AC_sQTL
ENSG00000266173	chr17	63741985	63741986	STRADA	MiGA_GTS_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000271605	chr17	64449036	64449037	MILR1	monocyte_ROSMAP_eQTL

Case study: ACE xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶