micromamba install -n r_libs r-pecotmr

# If an error occurs while sourcing scripts, it might be because your get() returned NULL. 
#Please restart the kernel or click the R kernel in the upper right corner to resolve the issue.
source('../../codes/cb_plot.R')
source('../../codes/utilis.R')

for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'CTSH'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/CTSH/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/CTSH/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'CTSH')

options(repr.plot.width = 10, repr.plot.height = 10)

for (mash_p_tmp in mash_p) {
    print(mash_p_tmp)
}

message("Multi context in ROSMAP data")

multi_context_rosmap_tmp <- tryCatch(
    readRDS(paste0('/data/analysis_result/multi_context/ROSMAP/mnm/ROSMAP_DeJager.',
                   target_gene_info$gene_info$`#chr`, '_', gene_id, '.multicontext_bvsr.rds')),
    error = function(e) message('Error in loading ROSMAP multi context data')
)
if (!is.null(multi_context_rosmap_tmp[[1]]$mvsusie_fitted)) {
    plot_and_save(multi_context_rosmap_tmp[[1]], 'plots/CTSH/sec4.multi_context_ROSMAP')
} else {
    message('Multi Context results are empty in ROSMAP data')
}

# Load and process MSBB data
message("Multi context in MSBB data")

multi_context_msbb_tmp <- tryCatch(
    readRDS(paste0('/data/analysis_result/multi_context/MSBB/mnm/MSBB_eQTL.',
                   target_gene_info$gene_info$`#chr`, '_', gene_id, '.multicontext_bvsr.rds')),
    error = function(e)  message('Error in loading MSBB multi context data')
)
if (!is.null(multi_context_msbb_tmp[[1]]$mvsusie_fitted)) {
    plot_and_save(multi_context_msbb_tmp[[1]], 'plots/CTSH/sec4.multi_context_MSBB')
} else {
    message('Multi Context results are empty in MSBB data')
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.CTSH.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$TADB_id %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

target_gene_info$gene_info$region_id

mnm_gene_tmp <- readRDS('/data/analysis_result/multi_gene/ROSMAP/mnm_genes//ROSMAP_AC_DeJager_eQTL.chr15_76894683_83526412.multigene_bvrs.rds')
mnm_gene_tmp[[1]]$mvsusie_fitted$condition_names

# Main loop to process sliding windows
mnm_gene <- list()
for (window in sliding_windows) {
    context_files <- list.files('/data/analysis_result/multi_gene/ROSMAP/mnm_genes/', window, full.names = T) %>% .[str_detect(., '.multigene_bvrs.rds')]
    for(context_file in context_files){
        context_mnm = context_file %>% basename %>% str_split(., '[.]', simplify = T) %>% .[,1]
        # Load multi-gene data
        mnm_gene_tmp <- tryCatch(
            readRDS(context_file),
            error = function(e) NULL
        )
        
        if (!is.null(mnm_gene_tmp)) {
            # Check if target gene is in the condition names
            if (target_gene_info$gene_info$region_id %in% mnm_gene_tmp[[1]]$mvsusie_fitted$condition_names) {
                # Use a common prefix format for multi-gene plots
                plot_and_save(mnm_gene_tmp[[1]], 'plots/JAZF1/sec6.multigene')
            } else {
                message('There is mnm result for TAD window ', window, ' in ', context_file,
                        ', but it does not include target gene ', gene_name, ' in CS.')
            }
            # Append to the results list
            mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
        } 
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_contexts <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(CTSH_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for CTSH csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/CTSH/sec11.interaction_association_CTSH_lessPIP25.pdf', height = 5, width = 8)

quant_coef_colocvar

vars_p

apoe_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
   p =  ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/CTSH/sec12.trans_fine_mapping_',gene_name,'.pdf'),p, height = 5, width = 8)
    p
    } else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000103811	chr15	77880000	82520000	78949573	chr15:77703385-79213579,chr15:79213579-82094856,chr15:82094856-84973763	15_77703385-79213579,15_79213579-82094856,15_82094856-84973763	15_77703385-79213579,15_79213579_82094856,15_82094856_84973763	TADB_1142	chr15_76894683_83526412	78949574	78921058	chr15:71199029-78926846,chr15:73202704-83526412,chr15:74639085-85571216,chr15:76894683-86330334,chr15:81298552-90655467	CTSH

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
Ast; Exc; Inh; DLPFC; AC; PCC; Monocyte; pQTL; AC_unproductive; DLPFC_unproductive; PCC_unproductive; AD_Bellenguez_2022	1.0000000	2	0.5620890	5895; 5905	chr15:78942615:G:A; chr15:78944951:C:T	chr15:78942615:G:A	coloc_sets:Y2_Y4_Y5_Y6_Y7_Y8_Y9_Y10_Y11_Y13_Y15_Y16:CS3
DLPFC_unproductive; PCC_unproductive	0.9983280	2	0.6896885	5951; 5944	chr15:78953549:G:A; chr15:78953123:G:A	chr15:78953549:G:A	coloc_sets:Y13_Y15:CS1
Ast; DLPFC; AC; PCC	0.9622871	2	0.7819170	6045; 6044	chr15:78970799:T:TC; chr15:78970780:T:G	chr15:78970799:T:TC	coloc_sets:Y2_Y6_Y7_Y8:CS2

	variants	Ast	Exc	Inh	DLPFC	AC	PCC	Monocyte	pQTL	AC_unproductive	DLPFC_unproductive	PCC_unproductive	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr15:78942615:G:A	chr15:78942615:G:A	-6.627648	9.932729	3.223098	-10.16455	-13.73221	-8.476387	-14.05948	-16.81279	5.948535	7.039122	7.663185	-4.522059
chr15:78944951:C:T	chr15:78944951:C:T	-6.597645	9.963029	3.360427	-10.15700	-13.73079	-8.476387	-13.78699	-16.81279	5.954265	7.022584	7.663185	-4.664234

	variants	DLPFC_unproductive	PCC_unproductive
	<chr>	<dbl>	<dbl>
chr15:78953549:G:A	chr15:78953549:G:A	-7.997664	-8.791889
chr15:78953123:G:A	chr15:78953123:G:A	-7.931648	-8.737987

	variants	Ast	DLPFC	AC	PCC
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>
chr15:78970799:T:TC	chr15:78970799:T:TC	8.580473	12.09205	7.862659	6.873574
chr15:78970780:T:G	chr15:78970780:T:G	8.535104	12.07509	7.541040	6.812202

Case study: CTSH xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Quantile QTL analysis¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶

coloc_csets_1	coloc_csets_2	min_abs_cor	max_abs_cor	median_abs_cor
coloc_sets:Y2_Y4_Y5_Y6_Y7_Y8_Y9_Y10_Y11_Y13_Y15_Y16:CS3	coloc_sets:Y13_Y15:CS1	0.536318981888972	0.540882548155626	0.538561434994915
coloc_sets:Y2_Y4_Y5_Y6_Y7_Y8_Y9_Y10_Y11_Y13_Y15_Y16:CS3	coloc_sets:Y2_Y6_Y7_Y8:CS2	0.0245391224731722	0.0405268188681362	0.0326097212807046
coloc_sets:Y13_Y15:CS1	coloc_sets:Y2_Y6_Y7_Y8:CS2	0.180654233321276	0.190119170825354	0.185299420483055

A data.frame: 43 x 6
gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000041357	chr15	78540404	78540405	PSMA4	BM_22_MSBB_eQTL,Exc_mega_eQTL
ENSG00000058335	chr15	79090779	79090780	RASGRF1	MiGA_GTS_eQTL,DLPFC_Bennett_pQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000080644	chr15	78621294	78621295	CHRNA3	MiGA_GTS_eQTL,MiGA_SVZ_eQTL
ENSG00000103723	chr15	82709945	82709946	AP3B2	MiGA_GTS_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000103740	chr15	78245687	78245688	ACSBG1	MiGA_GTS_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000103876	chr15	80152489	80152490	FAH	MiGA_THA_eQTL,MSBB_BM36_pQTL,DLPFC_Bennett_pQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000103888	chr15	80779342	80779343	CEMIP	MiGA_SVZ_eQTL,Exc_DeJager_eQTL,DLPFC_DeJager_eQTL,Exc_Kellis_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000103942	chr15	82986152	82986153	HOMER2	BM_36_MSBB_eQTL
ENSG00000117899	chr15	80989827	80989828	MESD	ROSMAP_DLPFC_sQTL
ENSG00000117906	chr15	76931737	76931738	RCN2	BM_22_MSBB_eQTL
ENSG00000136371	chr15	79897378	79897379	MTHFS	Exc_mega_eQTL,STARNET_eQTL
ENSG00000136378	chr15	78811463	78811464	ADAMTS7	ROSMAP_AC_sQTL
ENSG00000136379	chr15	80679683	80679684	ABHD17C	MiGA_SVZ_eQTL,Ast_DeJager_eQTL,DLPFC_DeJager_eQTL,AC_DeJager_eQTL,Ast_mega_eQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000136381	chr15	78437430	78437431	IREB2	MiGA_SVZ_eQTL,MiGA_THA_eQTL,ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000140379	chr15	79971195	79971196	BCL2A1	MiGA_GTS_eQTL
ENSG00000140395	chr15	78299635	78299636	WDR61	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000140403	chr15	78264085	78264086	DNAJA4	MiGA_GTS_eQTL,BM_36_MSBB_eQTL,Inh_mega_eQTL,ROSMAP_AC_sQTL,STARNET_eQTL
ENSG00000140406	chr15	81000922	81000923	TLNRD1	BM_22_MSBB_eQTL
ENSG00000140598	chr15	82262733	82262734	EFL1	ROSMAP_PCC_sQTL
ENSG00000156206	chr15	81007032	81007033	CFAP161	MiGA_GTS_eQTL
ENSG00000156218	chr15	83654087	83654088	ADAMTSL3	ROSMAP_PCC_sQTL
ENSG00000166411	chr15	78131497	78131498	IDH3A	Knight_eQTL,MiGA_SVZ_eQTL,MiGA_THA_eQTL,ROSMAP_PCC_sQTL
ENSG00000166426	chr15	78340352	78340353	CRABP1	MiGA_THA_eQTL,BM_36_MSBB_eQTL
ENSG00000167202	chr15	78077723	78077724	TBC1D2B	MiGA_SVZ_eQTL,Exc_Kellis_eQTL,ROSMAP_PCC_sQTL
ENSG00000169609	chr15	83011640	83011641	C15orf40	ROSMAP_AC_sQTL
ENSG00000169684	chr15	78565519	78565520	CHRNA5	MiGA_GTS_eQTL,MiGA_THA_eQTL,ROSMAP_PCC_sQTL
ENSG00000172345	chr15	81324182	81324183	STARD5	MiGA_SVZ_eQTL,MiGA_THA_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL
ENSG00000172349	chr15	81159574	81159575	IL16	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000172379	chr15	80404381	80404382	ARNT2	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000173517	chr15	77420143	77420144	PEAK1	MiGA_SVZ_eQTL
ENSG00000180953	chr15	79923701	79923702	ST20	BM_44_MSBB_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000182774	chr15	82540458	82540459	RPS17	MiGA_GTS_eQTL,PCC_DeJager_eQTL
ENSG00000183476	chr15	78077807	78077808	SH2D7	MiGA_GTS_eQTL
ENSG00000183496	chr15	82046118	82046119	MEX3B	MiGA_THA_eQTL
ENSG00000185787	chr15	78810486	78810487	MORF4L1	MSBB_BM36_pQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000186628	chr15	82806069	82806070	FSD2	MiGA_SVZ_eQTL
ENSG00000188266	chr15	78507563	78507564	HYKK	MiGA_THA_eQTL
ENSG00000188659	chr15	82262809	82262810	SAXO2	Knight_eQTL,MiGA_GTS_eQTL,MiGA_SVZ_eQTL,MiGA_THA_eQTL,BM_10_MSBB_eQTL,BM_22_MSBB_eQTL,BM_36_MSBB_eQTL,BM_44_MSBB_eQTL,Oli_DeJager_eQTL,Exc_DeJager_eQTL,Inh_DeJager_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL,Oli_Kellis_eQTL,monocyte_ROSMAP_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000197978	chr15	82430019	82430020	GOLGA6L9	MiGA_GTS_eQTL,DLPFC_DeJager_eQTL,Exc_Kellis_eQTL,ROSMAP_AC_sQTL