micromamba install -n r_libs r-pecotmr

# If an error occurs while sourcing scripts, it might be because your get() returned NULL. 
#Please restart the kernel or click the R kernel in the upper right corner to resolve the issue.
source('../../codes/cb_plot.R')
source('../../codes/utilis.R')
source("../../codes/qtl_utils.R",chdir = TRUE)

for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'BIN1'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

xqtlad<-fread('/data/interactive_analysis/adpelle1/xqtl-paper/landscape_analysis/res_gwas_qtl_anno_ADGWAS_xQTL_noMSBBsQTL_filtered.csv.gz')
#to see 
#xqtlad[gene_name==gname]
#get also the corresponding locus_context_id annotated flatten table
resf<-fread('/data/interactive_analysis/adpelle1/xqtl-paper/landscape_analysis/Fungen_xQTL_allQTL.overlapped.gwas.export.snATAC.ADGWAS_fix.addMetabrain.locuscontextid_ADcs95corrCs95Cs70corr_xQTLcs95corrCs70corrSNP0.2.csv.gz')
gname=gene_name

options(repr.plot.width = 12, repr.plot.height = round(nrow(xqtlad[gene_name==gname])/3))

GeneTrackQTL(gene=gname,ADxQTL_summ=xqtlad,flatten_table = resf)

ggsave('BIN1_track.pdf',width = 12,height = round(nrow(xqtlad[gene_name==gname])/3))
ggsave('BIN1_track.png',width = 12,height = round(nrow(xqtlad[gene_name==gname])/3))

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/BIN1/sec2.colocboost_res.pdf', width = 10, height = 2 * cb_plot_row(cb_res))
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
if(nrow(cb_res_table) > 1){
    get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')
} else {
message('No more than 1 Cos, No need to compute between LD')
}

# check if we have AD coloc results in orginal data
AD_in_coloc <- sum(unlist(str_split(get_cb_summary(cb_res)[, 1], '; ')) %>% gsub(' ', '', .) %>% str_detect('AD_Bellenguez_2022')) > 0
if (AD_in_coloc){
    run_marginal <- FALSE
} else   run_marginal <- TRUE

if(run_marginal){ 
    cb_marginal <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, fake_contexts = c('AD_Bellenguez_2022'), fake_targets = get_most_popular_pheno(cb_res))
    pdf('plots/BIN1/sec2.colocboost_res_marginal.pdf', width = 10, height = 2 * cb_plot_row(cb_marginal$cb_res))
    replayPlot(cb_marginal$p)
    dev.off()
} else {
    message('Original coloc results already include AD coloc set.\nTemporarily skip processing fake AD csets, or set `run_marginal = TRUE` before running this cell.')
}

# example only shown in BIN1 case 
# run_marginal = T
# if(run_marginal){ 
#     cb_marginal <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, fake_contexts = c('AD_Bellenguez_2022'), fake_targets = get_most_popular_pheno(cb_res))
#     pdf('plots/BIN1/sec2.colocboost_res_marginal.pdf', width = 10, height = 2 * cb_plot_row(cb_marginal$cb_res))
#     replayPlot(cb_marginal$p)
#     dev.off()
# } else {
#     message('Original coloc results already include AD coloc set.\nTemporarily skip processing fake AD csets, or set `run_marginal = TRUE` before running this cell.')
# }

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/BIN1/sec3.colocboost_res_allad.pdf', width = 10, height = 2 * cb_plot_row(cb_ad$cb_res))
replayPlot(cb_ad$p)
dev.off()

if(run_marginal){ 
    cb_marginal_ad <- plot_cb(cb_res = cb_marginal$cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)
    
    pdf('plots/BIN1/sec3.colocboost_res_marginal_allad.pdf', width = 10, height = 2 * cb_plot_row(cb_marginal_ad$cb_res))
    replayPlot(cb_marginal_ad$p)
    dev.off()
} else {
    message('Original coloc results already include AD coloc set.\nTemporarily skip processing fake AD csets, or set `run_marginal = TRUE` before running this cell.')
}

source('../../codes/utilis.R')

mash_p <- mash_plot(gene_name = 'BIN1')

options(repr.plot.width = 10, repr.plot.height = 10)

for (mash_p_tmp in mash_p) {
    print(mash_p_tmp)
    ggsave(filename = 'plots/BIN1/sec4.mash_posterior.pdf',mash_p_tmp, height = 6, width = 7)
}

message("Multi context in ROSMAP data")

multi_context_rosmap_tmp <- tryCatch(
    readRDS(paste0('/data/analysis_result/multi_context/ROSMAP/mnm/ROSMAP_DeJager.',
                   target_gene_info$gene_info$`#chr`, '_', gene_id, '.multicontext_bvsr.rds')),
    error = function(e) message('Error in loading ROSMAP multi context data')
)
if (!is.null(multi_context_rosmap_tmp[[1]]$mvsusie_fitted)) {
    plot_and_save(multi_context_rosmap_tmp[[1]], 'plots/BIN1/sec4.multi_context_ROSMAP')
} else {
    message('Multi Context results are empty in ROSMAP data')
}

# Load and process MSBB data
message("Multi context in MSBB data")

multi_context_msbb_tmp <- tryCatch(
    readRDS(paste0('/data/analysis_result/multi_context/MSBB/mnm/MSBB_eQTL.',
                   target_gene_info$gene_info$`#chr`, '_', gene_id, '.multicontext_bvsr.rds')),
    error = function(e)  message('Error in loading MSBB multi context data')
)
if (!is.null(multi_context_msbb_tmp[[1]]$mvsusie_fitted)) {
    plot_and_save(multi_context_msbb_tmp[[1]], 'plots/BIN1/sec4.multi_context_MSBB')
} else {
    message('Multi Context results are empty in MSBB data')
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

ggsave('plots/BIN1/sec5.TWAS_BIN1.pdf', width = 6, height = 4)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.BIN1.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$TADB_id %>% strsplit(., ',') %>% unlist %>% as.character 
sliding_windows

# Main loop to process sliding windows
mnm_gene <- list()
for (window in sliding_windows) {
    context_files <- list.files('/data/analysis_result/multi_gene/ROSMAP/mnm_genes/', window, full.names = T) %>% .[str_detect(., '.multigene_bvrs.rds')]
    for(context_file in context_files){
        context_mnm = context_file %>% basename %>% str_split(., '[.]', simplify = T) %>% .[,1]
        # Load multi-gene data
        mnm_gene_tmp <- tryCatch(
            readRDS(context_file),
            error = function(e) NULL
        )
        
        if (!is.null(mnm_gene_tmp)) {
            # Check if target gene is in the condition names
            if (target_gene_info$gene_info$region_id %in% mnm_gene_tmp[[1]]$mvsusie_fitted$condition_names) {
                # Use a common prefix format for multi-gene plots
                plot_and_save(mnm_gene_tmp[[1]], 'plots/BIN1/sec6.multigene')
            } else {
                message('There is mnm result for TAD window ', window, ' in ', context_mnm,
                        ', but it does not include target gene ', gene_name, ' in CS.')
            }
            # Append to the results list
            mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
        } 
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() +  facet_grid(cs_coverage_0.95+study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 4) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 4) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_contexts <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/BIN1/sec8.colocboost_res_fmp_contexts.pdf', width = 10, height = 2 * cb_plot_row(cb_contexts$cb_res))
replayPlot(cb_contexts$p)
dev.off()

if(run_marginal){ 
    cb_marginal_contexts <- plot_cb(cb_res = cb_marginal$cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)
    pdf('plots/BIN1/sec8.colocboost_res_marginal_fmp_contexts.pdf', width = 10, height = 2 * cb_plot_row(cb_marginal_contexts$cb_res))
    replayPlot(cb_marginal_contexts$p)
    dev.off()
} else {
    message('Original coloc results already include AD coloc set.\nTemporarily skip processing fake AD csets, or set `run_marginal = TRUE` before running this cell.')
}

int_apoe_p

quant_coef_colocvar

vars_p

apoe_p

func_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
    ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/BIN1/sec12.trans_fine_mapping_',gene_name,'.pdf'), height = 5, width = 8)
} else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000136717	chr2	123880000	130720000	127107287	chr2:122654970-124537054,chr2:124537054-125689597,chr2:125689597-127728648,chr2:127728648-129107569,chr2:129107569-130787741	2_122654970-124537054,2_124537054-125689597,2_125689597-127728648,2_127728648-129107569,2_129107569-130787741	2_122654970-124537054,2_124537054_125689597,2_125689597_127728648,2_127728648_129107569,2_129107569_130787741	TADB_175,TADB_176,TADB_177	chr2_123011984_128107288,chr2_126048027_131718831,chr2_126535801_133037993	127107288	127048027	chr2:116754139-124869570,chr2:118302225-128107288,chr2:120737102-131718831,chr2:123011984-133037993,chr2:126048027-134596399,chr2:126535801-135959342,chr2:130104846-136876443	BIN1

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
AC; AC_unproductive	1.0000000	1	0.9873458	15628	chr2:127076883:T:C	chr2:127076883:T:C	coloc_sets:Y8_Y13:CS1
Exc; DLPFC; AC; PCC; AC_unproductive	0.9662879	4	0.3171031	15715; 15747; 15752; 15882	chr2:127092925:T:C; chr2:127101168:A:G; chr2:127101865:A:G; chr2:127123057:C:T	chr2:127092925:T:C	coloc_sets:Y5_Y7_Y8_Y9_Y13:CS2
Mic; AD_Bellenguez_2022	0.9994440	2	0.9999998	15981; 15986	chr2:127135234:C:T; chr2:127136522:G:A	chr2:127135234:C:T	coloc_sets:Y1_Y18:MergeCS1

	variants	AC	AC_unproductive
	<chr>	<dbl>	<dbl>
chr2:127076883:T:C	chr2:127076883:T:C	-9.528189	-11.59702

	variants	Exc	DLPFC	AC	PCC	AC_unproductive
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr2:127092925:T:C	chr2:127092925:T:C	-4.707726	-3.887473	-12.33930	-5.675266	-10.31998
chr2:127101168:A:G	chr2:127101168:A:G	-4.707726	-3.887473	-12.33930	-5.675266	-10.31998
chr2:127101865:A:G	chr2:127101865:A:G	-4.707726	-3.887473	-12.33930	-5.675266	-10.31998
chr2:127123057:C:T	chr2:127123057:C:T	-4.739743	-3.679612	-11.52623	-5.871070	-10.49521

	variants	Mic	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>
chr2:127135234:C:T	chr2:127135234:C:T	9.153807	20.07143
chr2:127136522:G:A	chr2:127136522:G:A	8.345345	14.34444

Case study: BIN1 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

from ADxQTL summary table¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Original output from ColocBoost¶

Projecting Coloc Sets for Marginal Performance¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Projecting Bellenguez Coloc Sets onto other AD Datasets' Marginal Results¶

Projecting the Most Popular Coloc Set onto other AD Datasets' Marginal Results¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Projecting Bellenguez Coloc Sets onto other xQTLs' Marginal Results¶

Projecting the Most Popular Coloc Set onto other xQTLs' Marginal Results¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Quantile QTL¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶

coloc_csets_1	coloc_csets_2	min_abs_cor	max_abs_cor	median_abs_cor
coloc_sets:Y8_Y13:CS1	coloc_sets:Y5_Y7_Y8_Y9_Y13:CS2	0.635484450603831	0.650396425065784	0.650396425065784
coloc_sets:Y8_Y13:CS1	coloc_sets:Y1_Y18:MergeCS1	0.08178432445715	0.298153818017145	0.189969071237148
coloc_sets:Y5_Y7_Y8_Y9_Y13:CS2	coloc_sets:Y1_Y18:MergeCS1	0.216487087638644	0.543568774101822	0.377065075053725