micromamba install -n r_libs r-pecotmr
micromamba install -n r_libs r-bedmatrix

source('/data/interactive_analysis/rf2872/codes/cb_plot.R')
source('/data/interactive_analysis/rf2872/codes/utilis.R')
for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'CR2'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('/data/interactive_analysis/rf2872/codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/CR2/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.Error : File '/data/GWAS/ADGWAS_sumstats/1_205972031-208461272.RSS_QC_RAISS_imputed.AD_Wightman_Excluding23andMe_2021.sumstats.tsv.gz' does not exist or is non-readable. getwd()=='/data/interactive_analysis/hs3163/GIT/xqtl-paper/AD_targets/CR2'
No pvalue cutoff. Extract all variants names.Error : File '/data/GWAS/ADGWAS_sumstats/1_205972031-208461272.RSS_QC_RAISS_imputed.AD_Wightman_ExcludingUKBand23andME_2021.sumstats.tsv.gz' does not exist or is non-readable. getwd()=='/data/interactive_analysis/hs3163/GIT/xqtl-paper/AD_targets/CR2'
No pvalue cutoff. Extract all variants names.Error : File '/data/GWAS/ADGWAS_sumstats/1_205972031-208461272.RSS_QC_RAISS_imputed.AD_Wightman_Full_2021.sumstats.tsv.gz' does not exist or is non-readable. getwd()=='/data/interactive_analysis/hs3163/GIT/xqtl-paper/AD_targets/CR2'
No pvalue cutoff. Extract all variants names.

pdf('plots/CR2/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.CR2.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(CR2_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for CR2 csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/CR2/sec11.interaction_association_CR2_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

func_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
    ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/CR2/sec12.trans_fine_mapping_',gene_name,'.pdf'), height = 5, width = 8)
} else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000117322	chr1	206120000	208489895	207454229	chr1:205972031-208461272,chr1:208461272-210906847	1_205972031-208461272,1_208461272-210906847	1_205972031-208461272,1_208461272_210906847	TADB_93,TADB_94,TADB_95	chr1_200829148_207464443,chr1_205117782_208795513,chr1_206496146_210857565	207454230	207489895	chr1:197638456-207464443,chr1:200359008-208795513,chr1:200829148-210857565,chr1:205117782-212690103,chr1:206496146-214015867	CR2

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
DLPFC; AD_Bellenguez_2022	0.9472933	7	0.6713393	5322; 5210; 5309; 5555; 5573; 5562; 5552	chr1:207577223:T:C; chr1:207510847:T:G; chr1:207573951:A:G; chr1:207629207:A:C; chr1:207633385:G:A; chr1:207630796:A:C; chr1:207627210:T:C	chr1:207577223:T:C	coloc_sets:Y1_Y2:CS1

	variants	DLPFC	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>
chr1:207577223:T:C	chr1:207577223:T:C	-6.347764	-11.96154
chr1:207510847:T:G	chr1:207510847:T:G	-6.310378	-11.86538
chr1:207573951:A:G	chr1:207573951:A:G	-6.286799	-11.60194
chr1:207629207:A:C	chr1:207629207:A:C	-6.007874	-11.90385
chr1:207633385:G:A	chr1:207633385:G:A	-6.060694	-11.64078
chr1:207630796:A:C	chr1:207630796:A:C	-6.060694	-11.63107
chr1:207627210:T:C	chr1:207627210:T:C	-6.060694	-11.60194

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000076356	chr1	208244383	208244384	PLXNA2	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000123838	chr1	207104232	207104233	C4BPA	ROSMAP_AC_sQTL
ENSG00000123843	chr1	207088859	207088860	C4BPB	MiGA_SVZ_eQTL
ENSG00000143486	chr1	206612464	206612465	EIF2D	Exc_DeJager_eQTL
ENSG00000143858	chr1	202710453	202710454	SYT2	MiGA_SVZ_eQTL
ENSG00000162894	chr1	206923246	206923247	FCMR	MiGA_GTS_eQTL
ENSG00000180667	chr1	207052979	207052980	YOD1	MiGA_GFM_eQTL
ENSG00000196352	chr1	207321531	207321532	CD55	BM_10_MSBB_eQTL,BM_36_MSBB_eQTL,Oli_DeJager_eQTL,Exc_DeJager_eQTL,Oli_Kellis_eQTL,Exc_mega_eQTL,Oli_mega_eQTL,STARNET_eQTL
ENSG00000203710	chr1	207496146	207496147	CR1	Knight_eQTL,BM_10_MSBB_eQTL,BM_22_MSBB_eQTL,BM_36_MSBB_eQTL,BM_44_MSBB_eQTL,Oli_DeJager_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,Oli_Kellis_eQTL,Oli_mega_eQTL
ENSG00000263528	chr1	206470475	206470476	IKBKE	MiGA_GTS_eQTL,ROSMAP_PCC_sQTL
ENSG00000266094	chr1	206507530	206507531	RASSF5	MiGA_GTS_eQTL

Case study: CR2 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶