micromamba install -n r_libs r-pecotmr
micromamba install -n r_libs r-bedmatrix

source('../../codes/cb_plot.R')
source('../../codes/utilis.R')
for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'JAZF1'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/JAZF1/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/JAZF1/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

message("Multi context in ROSMAP data")

multi_context_rosmap_tmp <- tryCatch(
    readRDS(paste0('/data/analysis_result/multi_context/ROSMAP/mnm/ROSMAP_DeJager.',
                   target_gene_info$gene_info$`#chr`, '_', gene_id, '.multicontext_bvsr.rds')),
    error = function(e) message('Error in loading ROSMAP multi context data')
)
if (!is.null(multi_context_rosmap_tmp[[1]]$mvsusie_fitted)) {
    plot_and_save(multi_context_rosmap_tmp[[1]], 'plots/JAZF1/sec4.multi_context_ROSMAP')
} else {
    message('Multi Context results are empty in ROSMAP data')
}

# Load and process MSBB data
message("Multi context in MSBB data")

multi_context_msbb_tmp <- tryCatch(
    readRDS(paste0('/data/analysis_result/multi_context/MSBB/mnm/MSBB_eQTL.',
                   target_gene_info$gene_info$`#chr`, '_', gene_id, '.multicontext_bvsr.rds')),
    error = function(e)  message('Error in loading MSBB multi context data')
)
if (!is.null(multi_context_msbb_tmp[[1]]$mvsusie_fitted)) {
    plot_and_save(multi_context_msbb_tmp[[1]], 'plots/JAZF1/sec4.multi_context_MSBB')
} else {
    message('Multi Context results are empty in MSBB data')
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.JAZF1.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$TADB_id %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene_tmp <- readRDS('/data/analysis_result/multi_gene/ROSMAP/mnm_genes//ROSMAP_Mic_DeJager_eQTL.chr7_24118656_29243917.multigene_bvrs.rds')

mnm_gene_tmp[[1]]$mvsusie_fitted$condition_names

ENSG00000105928: Chromosome 7: 24,698,355-24,757,940 reverse strand.
ENSG00000070882: Chromosome 7: 24,796,540-24,981,634 reverse strand.
JAZF1: Chromosome 7: 27,830,573-28,180,795
CPVL: Chromosome 7: 28,995,235-29,195,451 reverse strand.

mnm_gene_tmp <- readRDS('/data/analysis_result/multi_gene/ROSMAP/mnm_genes//ROSMAP_Mic_DeJager_eQTL.chr7_26830573_29825894.multigene_bvrs.rds')

mnm_gene_tmp[[1]]$mvsusie_fitted$condition_names

NULL

mnm_gene_tmp <- readRDS('/data/analysis_result/multi_gene/ROSMAP/mnm_genes//ROSMAP_Mic_DeJager_eQTL.chr7_27953358_31892081.multigene_bvrs.rds')

mnm_gene_tmp[[1]]$mvsusie_fitted$condition_names

NULL

# Main loop to process sliding windows
mnm_gene <- list()
for (window in sliding_windows) {
    context_files <- list.files('/data/analysis_result/multi_gene/ROSMAP/mnm_genes/', window, full.names = T) %>% .[str_detect(., '.multigene_bvrs.rds')]
    for(context_file in context_files){
        context_mnm = context_file %>% basename %>% str_split(., '[.]', simplify = T) %>% .[,1]
        # Load multi-gene data
        mnm_gene_tmp <- tryCatch(
            readRDS(context_file),
            error = function(e) NULL
        )
        
        if (!is.null(mnm_gene_tmp)) {
            # Check if target gene is in the condition names
            if (target_gene_info$gene_info$region_id %in% mnm_gene_tmp[[1]]$mvsusie_fitted$condition_names) {
                # Use a common prefix format for multi-gene plots
                plot_and_save(mnm_gene_tmp[[1]], 'plots/JAZF1/sec6.multigene')
            } else {
                message('There is mnm result for TAD window ', window, ' in ', context_mnm,
                        ', but it does not include target gene ', gene_name, ' in CS.')
            }
            # Append to the results list
            mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
        } 
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(JAZF1_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for JAZF1 csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/JAZF1/sec11.interaction_association_JAZF1_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

func_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
    ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/JAZF1/sec12.trans_fine_mapping_',gene_name,'.pdf'), height = 5, width = 8)
} else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000153814	chr7	25120000	29180795	28180794	chr7:23432442-25641844,chr7:25641844-28408975,chr7:28408975-30967102	7_23432442-25641844,7_25641844-28408975,7_28408975-30967102	7_23432442-25641844,7_25641844_28408975,7_28408975_30967102	TADB_620,TADB_621,TADB_622	chr7_24118656_29243917,chr7_26830573_29825894,chr7_27953358_31892081	28180795	27830573	chr7:21118236-26180356,chr7:21419444-29243917,chr7:22480752-29825894,chr7:24118656-31892081,chr7:26830573-33761787,chr7:27953358-36697459	JAZF1

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
AC_unproductive; DLPFC_unproductive; PCC_unproductive	1.0000000	1	1.0000000	12479	chr7:27913416:A:G	chr7:27913416:A:G	coloc_sets:Y12_Y13_Y15:CS1
Mic; OPC; AC; AD_Bellenguez_2022	0.9808940	9	0.1481922	13387; 13318; 13315; 13383; 13313; 13389; 13368; 13363; 13365	chr7:28138643:G:T; chr7:28116987:C:G; chr7:28117268:C:T; chr7:28118439:C:T; chr7:28136686:G:A; chr7:28137682:C:T; chr7:28130784:T:C; chr7:28132564:A:G; chr7:28132395:C:T	chr7:28116987:C:G	coloc_sets:Y1_Y4_Y8_Y16:CS2
Ast; Exc	0.6362281	20	0.3212093	13576; 13545; 13546; 13245; 13433; 13397; 13452; 13458; 13456; 13443; 13459; 13470; 13472; 13413; 13416; 13388; 13667; 13390; 13529; 13350	chr7:28102567:G:T; chr7:28192838:A:G; chr7:28149792:T:C; chr7:28152661:C:T; chr7:28156603:A:G; chr7:28154778:C:A; chr7:28184371:T:C; chr7:28184434:A:G; chr7:28216621:G:A; chr7:28140937:T:C; chr7:28180193:A:G; chr7:28156794:G:A; chr7:28159058:C:T; chr7:28160478:C:T; chr7:28147156:A:AT; chr7:28123055:C:T; chr7:28147492:A:ATTTT; chr7:28151111:ATTTTTT:ATTT; chr7:28139006:TG:T; chr7:28137719:G:A	chr7:28184371:T:C	coloc_sets:Y2_Y5:MergeCS1

	variants	AC_unproductive	DLPFC_unproductive	PCC_unproductive
	<chr>	<dbl>	<dbl>	<dbl>
chr7:27913416:A:G	chr7:27913416:A:G	32.12217	9.719255	26.30224

	variants	Mic	OPC	AC	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>
chr7:28137682:C:T	chr7:28137682:C:T	28.19425	-9.486261	7.608747	-5.610000
chr7:28118439:C:T	chr7:28118439:C:T	28.19425	-9.486261	7.608747	-5.590000
chr7:28117268:C:T	chr7:28117268:C:T	28.19425	-9.486261	7.608747	-5.580000
chr7:28136686:G:A	chr7:28136686:G:A	28.19425	-9.486261	7.608747	-5.570000
chr7:28116987:C:G	chr7:28116987:C:G	28.19425	-9.486261	7.608747	-5.570000
chr7:28138643:G:T	chr7:28138643:G:T	27.69204	-9.351093	7.777642	-5.405941
chr7:28132564:A:G	chr7:28132564:A:G	27.70500	-9.407559	7.705515	-5.575758
chr7:28130784:T:C	chr7:28130784:T:C	27.70500	-9.407559	7.705515	-5.500000
chr7:28132395:C:T	chr7:28132395:C:T	27.68988	-9.339620	7.776547	-5.525253

	variants	Ast	Exc
	<chr>	<dbl>	<dbl>
chr7:28192838:A:G	chr7:28192838:A:G	6.958533	10.503492
chr7:28184371:T:C	chr7:28184371:T:C	6.857482	10.586918
chr7:28184434:A:G	chr7:28184434:A:G	6.857482	10.586918
chr7:28102567:G:T	chr7:28102567:G:T	-7.238666	-9.430452
chr7:28149792:T:C	chr7:28149792:T:C	6.824790	8.712003
chr7:28140937:T:C	chr7:28140937:T:C	6.733778	8.852091
chr7:28152661:C:T	chr7:28152661:C:T	6.798266	8.724166
chr7:28156603:A:G	chr7:28156603:A:G	6.798266	8.724166
chr7:28154778:C:A	chr7:28154778:C:A	6.779118	8.704865
chr7:28151111:ATTTTTT:ATTT	chr7:28151111:ATTTTTT:ATTT	6.561550	8.907106
chr7:28156794:G:A	chr7:28156794:G:A	6.634914	8.919351
chr7:28159058:C:T	chr7:28159058:C:T	6.634914	8.919351
chr7:28160478:C:T	chr7:28160478:C:T	6.634914	8.919351
chr7:28147156:A:AT	chr7:28147156:A:AT	6.617439	8.944349
chr7:28147492:A:ATTTT	chr7:28147492:A:ATTTT	6.451539	8.936288
chr7:28137719:G:A	chr7:28137719:G:A	6.469557	8.869312
chr7:28216621:G:A	chr7:28216621:G:A	6.784733	9.530695
chr7:28139006:TG:T	chr7:28139006:TG:T	6.377519	8.871913
chr7:28180193:A:G	chr7:28180193:A:G	6.775521	9.370325
chr7:28123055:C:T	chr7:28123055:C:T	5.965627	8.839078

Case study: JAZF1 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Quantile QTL¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶

coloc_csets_1	coloc_csets_2	min_abs_cor	max_abs_cor	median_abs_cor
coloc_sets:Y12_Y13_Y15:CS1	coloc_sets:Y1_Y4_Y8_Y16:CS2	0.0593781803562436	0.0685072279657154	0.0615616200035275
coloc_sets:Y12_Y13_Y15:CS1	coloc_sets:Y2_Y5:MergeCS1	0.0492489438940506	0.101409390070326	0.0897810753206947
coloc_sets:Y1_Y4_Y8_Y16:CS2	coloc_sets:Y2_Y5:MergeCS1	0.528714321117617	0.718508216714854	0.554143837787159

A data.frame: 23 x 6
gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000005020	chr7	26995238	26995239	SKAP2	Inh_mega_eQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000050344	chr7	26152197	26152198	NFE2L3	MiGA_THA_eQTL
ENSG00000078399	chr7	27175179	27175180	HOXA9	MiGA_GTS_eQTL
ENSG00000086300	chr7	26291861	26291862	SNX10	MiGA_SVZ_eQTL,BM_10_MSBB_eQTL,ROSMAP_AC_sQTL
ENSG00000105928	chr7	24757939	24757940	GSDME	MiGA_GFM_eQTL,MiGA_GTS_eQTL
ENSG00000105991	chr7	27095999	27096000	HOXA1	MiGA_SVZ_eQTL
ENSG00000106031	chr7	27200090	27200091	HOXA13	MiGA_SVZ_eQTL
ENSG00000106049	chr7	27662882	27662883	HIBADH	MiGA_GTS_eQTL,MiGA_SVZ_eQTL,Oli_DeJager_eQTL,AC_DeJager_eQTL,Oli_Kellis_eQTL,Exc_mega_eQTL,Oli_mega_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000106052	chr7	27739330	27739331	TAX1BP1	MiGA_SVZ_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000106066	chr7	29195450	29195451	CPVL	DLPFC_DeJager_eQTL,AC_DeJager_eQTL,DLPFC_Bennett_pQTL,DLPFC_Klein_gpQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000106069	chr7	29146568	29146569	CHN2	MiGA_GTS_eQTL
ENSG00000106086	chr7	30027403	30027404	PLEKHA8	STARNET_eQTL
ENSG00000122565	chr7	26201161	26201162	CBX3	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000122566	chr7	26201480	26201481	HNRNPA2B1	MiGA_GTS_eQTL,ROSMAP_AC_sQTL
ENSG00000122592	chr7	27157935	27157936	HOXA7	MiGA_GTS_eQTL,MiGA_THA_eQTL
ENSG00000136193	chr7	29990288	29990289	SCRN1	MSBB_BM36_pQTL,ROSMAP_PCC_sQTL
ENSG00000146592	chr7	28299320	28299321	CREB5	MiGA_SVZ_eQTL,MiGA_THA_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000153790	chr7	25180355	25180356	C7orf31	MiGA_SVZ_eQTL
ENSG00000172115	chr7	25125259	25125260	CYCS	MiGA_SVZ_eQTL
ENSG00000176532	chr7	29563834	29563835	PRR15	MiGA_SVZ_eQTL
ENSG00000255690	chr7	28958329	28958330	TRIL	MiGA_GTS_eQTL,BM_22_MSBB_eQTL
ENSG00000281039	chr7	30550760	30550761	AC005154.5	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000285162	chr7	29122339	29122340	AC004593.2	MiGA_SVZ_eQTL