micromamba install -n r_libs r-pecotmr
micromamba install -n r_libs r-bedmatrix

source('/data/interactive_analysis/rf2872/codes/xqtl-paper/codes/cb_plot.R')
source('/data/interactive_analysis/rf2872/codes/xqtl-paper/codes/utilis.R')
for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'TREM2'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('/data/interactive_analysis/rf2872/codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/TREM2/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/TREM2/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

plot_TWAS_res(gene_id = gene_id)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.TREM2.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

$pip_plot

$effect_plot

$z_plot
NULL

$effects
                         L1
ENSG00000065060 6.14539e-07
ENSG00000124772 6.14539e-07
ENSG00000146192 6.14539e-07
ENSG00000137193 6.14539e-07
ENSG00000095970 6.14539e-07

$pip_plot

$effect_plot

$z_plot
NULL

$effects
                        L2            L3         L1
ENSG00000124772 -0.3438072 -4.032918e-08 5.3144e-11
ENSG00000146192 -0.3438072 -4.032918e-08 5.3144e-11
ENSG00000095970 -0.3438072 -4.032918e-08 5.3144e-11
ENSG00000112576 -0.3438072 -4.032918e-08 5.3144e-11
ENSG00000124587 -0.3438072 -4.032918e-08 5.3144e-11
ENSG00000137221 -0.3438072 -4.032918e-08 5.3144e-11

$pip_plot

$effect_plot

$z_plot
NULL

$effects
                          L1
ENSG00000095970 0.0002645928
ENSG00000112576 0.0002645928
ENSG00000137413 0.0002645928
ENSG00000124587 0.0002645928
ENSG00000137221 0.0002645928
ENSG00000171453 0.0002645928

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(TREM2_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for TREM2 csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/TREM2/sec11.interaction_association_TREM2_lessPIP25.pdf', height = 5, width = 8)

FIXME

vars_p

apoe_p

FIXME

func_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
    ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/TREM2/sec12.trans_fine_mapping_',gene_name,'.pdf'), height = 5, width = 8)
} else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000095970	chr6	38720000	42163186	41163185	chr6:38719920-40377803,chr6:40377803-42070711,chr6:42070711-43914875	6_38719920-40377803,6_40377803-42070711,6_42070711-43914875	6_38719920-40377803,6_40377803_42070711,6_42070711_43914875	TADB_534,TADB_535	chr6_37715310_42754109,chr6_40335607_44640941	41163186	41158506	chr6:28657001-40030792,chr6:32277122-42754109,chr6:34258981-44640941,chr6:37715310-45450425,chr6:40335607-47080348	TREM2

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
DLPFC; AC; PCC	1.0000000	2	0.6224119	12431; 12426	chr6:41272084:C:T; chr6:41271095:C:A	chr6:41272084:C:T	coloc_sets:Y2_Y3_Y4:CS2
DLPFC; AC	0.8592258	5	0.6747348	11902; 11925; 11924; 11932; 11934	chr6:41190851:GTGTCTGTCTGTC:G; chr6:41192167:A:G; chr6:41192063:A:G; chr6:41194780:T:C; chr6:41194977:G:A	chr6:41194780:T:C	coloc_sets:Y2_Y3:CS1

	variants	DLPFC	AC	PCC
	<chr>	<dbl>	<dbl>	<dbl>
chr6:41272084:C:T	chr6:41272084:C:T	-6.16115	-6.932423	-6.331731
chr6:41271095:C:A	chr6:41271095:C:A	-6.16115	-6.876965	-6.293139

	variants	DLPFC	AC
	<chr>	<dbl>	<dbl>
chr6:41190851:GTGTCTGTCTGTC:G	chr6:41190851:GTGTCTGTCTGTC:G	6.533228	6.048445
chr6:41192167:A:G	chr6:41192167:A:G	6.817910	5.685052
chr6:41192063:A:G	chr6:41192063:A:G	6.680812	5.685052
chr6:41194780:T:C	chr6:41194780:T:C	7.133251	5.301670
chr6:41194977:G:A	chr6:41194977:G:A	7.044687	5.375782

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000001167	chr6	41072944	41072945	NFYA	MiGA_GTS_eQTL
ENSG00000044090	chr6	43053942	43053943	CUL7	ROSMAP_PCC_sQTL
ENSG00000096070	chr6	36196743	36196744	BRPF3	MiGA_GTS_eQTL
ENSG00000112167	chr6	39115185	39115186	SAYSD1	DLPFC_DeJager_eQTL
ENSG00000112195	chr6	41201148	41201149	TREML2	monocyte_ROSMAP_eQTL,STARNET_eQTL
ENSG00000112561	chr6	41736258	41736259	TFEB	MiGA_GTS_eQTL,MiGA_SVZ_eQTL
ENSG00000112576	chr6	42050356	42050357	CCND3	STARNET_eQTL
ENSG00000112578	chr6	41921498	41921499	BYSL	MiGA_THA_eQTL
ENSG00000112599	chr6	42194955	42194956	GUCA1B	MiGA_THA_eQTL
ENSG00000112619	chr6	42722596	42722597	PRPH2	BM_44_MSBB_eQTL
ENSG00000112658	chr6	43171268	43171269	SRF	MiGA_THA_eQTL
ENSG00000112659	chr6	43182184	43182185	CUL9	MiGA_THA_eQTL
ENSG00000124593	chr6	41780348	41780349	AL365205.1	ROSMAP_DLPFC_sQTL
ENSG00000124596	chr6	41097786	41097787	OARD1	MiGA_GTS_eQTL,ROSMAP_PCC_sQTL
ENSG00000124602	chr6	41039220	41039221	UNC5CL	MiGA_GFM_eQTL
ENSG00000124615	chr6	39934550	39934551	MOCS1	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000124688	chr6	43629539	43629540	MAD2L1BP	MiGA_SVZ_eQTL
ENSG00000124701	chr6	41053201	41053202	APOBEC2	MiGA_GFM_eQTL
ENSG00000124702	chr6	43014102	43014103	KLHDC3	MiGA_GFM_eQTL,MiGA_THA_eQTL
ENSG00000124731	chr6	41286681	41286682	TREM1	MiGA_GTS_eQTL,MiGA_SVZ_eQTL,STARNET_eQTL
ENSG00000137166	chr6	41546380	41546381	FOXP4	MiGA_SVZ_eQTL
ENSG00000137171	chr6	43040776	43040777	KLC4	ROSMAP_PCC_sQTL
ENSG00000137218	chr6	41786541	41786542	FRS3	ROSMAP_DLPFC_sQTL
ENSG00000146122	chr6	39792297	39792298	DAAM2	MiGA_GTS_eQTL,Ast_DeJager_eQTL,Oli_DeJager_eQTL,Ast_mega_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000146223	chr6	42879615	42879616	RPL7L1	ROSMAP_DLPFC_sQTL
ENSG00000156564	chr6	40587363	40587364	LRFN2	MiGA_GTS_eQTL
ENSG00000164627	chr6	39725407	39725408	KIF6	Oli_DeJager_eQTL,Oli_mega_eQTL,ROSMAP_PCC_sQTL
ENSG00000183826	chr6	38640147	38640148	BTBD9	ROSMAP_DLPFC_sQTL
ENSG00000188056	chr6	41228338	41228339	TREML4	MiGA_GFM_eQTL,MiGA_GTS_eQTL,MiGA_SVZ_eQTL,MiGA_THA_eQTL,BM_22_MSBB_eQTL,BM_44_MSBB_eQTL,STARNET_eQTL
ENSG00000198663	chr6	36871869	36871870	C6orf89	MiGA_THA_eQTL
ENSG00000204052	chr6	43510685	43510686	LRRC73	MiGA_SVZ_eQTL
ENSG00000221821	chr6	42890820	42890821	C6orf226	BM_10_MSBB_eQTL
ENSG00000278224	chr6	41780781	41780782	PRICKLE4	ROSMAP_DLPFC_sQTL

Case study: TREM2 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶