micromamba install -n r_libs r-pecotmr

# If an error occurs while sourcing scripts, it might be because your get() returned NULL. 
#Please restart the kernel or click the R kernel in the upper right corner to resolve the issue.
source('../../codes/cb_plot.R')
source('../../codes/utilis.R')

for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'PLCG2'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/PLCG2/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.Error : File '/data/GWAS/ADGWAS_sumstats/16_81293081-82644764.RSS_QC_RAISS_imputed.AD_Wightman_Full_2021.sumstats.tsv.gz' does not exist or is non-readable. getwd()=='/data/interactive_analysis/hs3163/GIT/xqtl-paper/AD_targets/PLCG2'
No pvalue cutoff. Extract all variants names.

pdf('plots/PLCG2/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'PLCG2')

options(repr.plot.width = 10, repr.plot.height = 10)

for (mash_p_tmp in mash_p) {
    print(mash_p_tmp)
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.PLCG2.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

$pip_plot

$effect_plot

$z_plot
NULL

$effects
                        L1
ENSG00000186153 -0.3497859
ENSG00000153815 -0.3497859
ENSG00000197943 -0.3497859
ENSG00000140945 -0.3497859
ENSG00000140943 -0.3497859
ENSG00000103187 -0.3497859

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_contexts <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(PLCG2_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for PLCG2 csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/PLCG2/sec11.interaction_association_PLCG2_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
   p =  ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/PLCG2/sec12.trans_fine_mapping_',gene_name,'.pdf'),p, height = 5, width = 8)
    p
    } else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000197943	chr16	80739096	85080000	81739096	chr16:80260790-81293081,chr16:81293081-82644764,chr16:82644764-84411478,chr16:84411478-85419908	16_80260790-81293081,16_81293081-82644764,16_82644764-84411478,16_84411478-85419908	16_80260790-81293081,16_81293081_82644764,16_82644764_84411478,16_84411478_85419908	TADB_1178,TADB_1179	chr16_76190834_82020270,chr16_79826074_86094230	81739097	81962685	chr16:69114331-82020270,chr16:72943078-86094230,chr16:76190834-90338345	PLCG2

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
AC; AC_productive; AC_unproductive	1.0000000	1	0.9995644	8051	chr16:81842674:G:T	chr16:81842674:G:T	coloc_sets:Y7_Y11_Y12:CS1
Oli; DLPFC	0.9470132	8	0.3348871	8581; 8588; 8606; 8632; 8615; 8585; 8633; 8617	chr16:81895134:T:A; chr16:81895883:C:T; chr16:81896927:T:C; chr16:81899607:A:G; chr16:81897798:A:G; chr16:81895580:A:G; chr16:81899622:T:C; chr16:81898357:G:C	chr16:81897798:A:G	coloc_sets:Y3_Y6:CS2
DLPFC; PCC	1.0000000	1	0.9999995	7386	chr16:81774628:T:G	chr16:81774628:T:G	coloc_sets:Y6_Y8:MergeCS1

	variants	AC	AC_productive	AC_unproductive
	<chr>	<dbl>	<dbl>	<dbl>
chr16:81842674:G:T	chr16:81842674:G:T	41.48703	-10.92301	26.44815

	variants	Oli	DLPFC
	<chr>	<dbl>	<dbl>
chr16:81895134:T:A	chr16:81895134:T:A	6.630979	7.952749
chr16:81895883:C:T	chr16:81895883:C:T	6.720441	7.837459
chr16:81896927:T:C	chr16:81896927:T:C	6.571342	8.006144
chr16:81899607:A:G	chr16:81899607:A:G	6.437596	8.034977
chr16:81897798:A:G	chr16:81897798:A:G	6.396584	8.038195
chr16:81895580:A:G	chr16:81895580:A:G	6.348199	8.009052
chr16:81899622:T:C	chr16:81899622:T:C	6.462049	7.947104
chr16:81898357:G:C	chr16:81898357:G:C	6.396584	7.985373

	variants	DLPFC	PCC
	<chr>	<dbl>	<dbl>
chr16:81774628:T:G	chr16:81774628:T:G	7.641081	6.713851

Case study: PLCG2 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶

coloc_csets_1	coloc_csets_2	min_abs_cor	max_abs_cor	median_abs_cor
coloc_sets:Y7_Y11_Y12:CS1	coloc_sets:Y3_Y6:CS2	0.0796129906298416	0.0944176693359274	0.0878608322517201
coloc_sets:Y7_Y11_Y12:CS1	coloc_sets:Y6_Y8:MergeCS1	0.0477331123436312	0.0477331123436312	0.0477331123436312
coloc_sets:Y3_Y6:CS2	coloc_sets:Y6_Y8:MergeCS1	0.000258086520375415	0.0164707029292386	0.00289978341439768

A data.frame: 36 x 6
gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000064270	chr16	84368526	84368527	ATP2C2	MiGA_SVZ_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000075399	chr16	89720897	89720898	VPS9D1	ROSMAP_AC_sQTL
ENSG00000086696	chr16	82035003	82035004	HSD17B2	MiGA_SVZ_eQTL
ENSG00000103121	chr16	81020269	81020270	CMC2	MiGA_THA_eQTL
ENSG00000103150	chr16	83899114	83899115	MLYCD	MiGA_SVZ_eQTL,MiGA_THA_eQTL,DLPFC_Bennett_pQTL
ENSG00000103154	chr16	83968243	83968244	NECAB2	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000103160	chr16	84145176	84145177	HSDL1	MiGA_GTS_eQTL,MiGA_SVZ_eQTL,PCC_DeJager_eQTL
ENSG00000103168	chr16	84187069	84187070	TAF1C	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000103175	chr16	84294845	84294846	WFDC1	MiGA_SVZ_eQTL,OPC_Kellis_eQTL,OPC_mega_eQTL
ENSG00000103187	chr16	84618077	84618078	COTL1	MiGA_SVZ_eQTL
ENSG00000103194	chr16	84699985	84699986	USP10	ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000103196	chr16	84819984	84819985	CRISPLD2	ROSMAP_DLPFC_sQTL
ENSG00000135686	chr16	84648510	84648511	KLHL36	MiGA_SVZ_eQTL,BM_10_MSBB_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000135697	chr16	81238688	81238689	BCO1	BM_44_MSBB_eQTL
ENSG00000135698	chr16	82170223	82170224	MPHOSPH6	MiGA_SVZ_eQTL,MiGA_THA_eQTL,Inh_mega_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000135709	chr16	85027750	85027751	KIAA0513	ROSMAP_PCC_sQTL
ENSG00000140943	chr16	84116941	84116942	MBTPS1	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000140945	chr16	82626964	82626965	CDH13	MiGA_GTS_eQTL,MiGA_SVZ_eQTL,BM_44_MSBB_eQTL,Oli_DeJager_eQTL,Oli_Kellis_eQTL,DLPFC_Klein_gpQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000140948	chr16	87493023	87493024	ZCCHC14	ROSMAP_PCC_sQTL
ENSG00000140950	chr16	84554032	84554033	MEAK7	MiGA_GFM_eQTL,DLPFC_DeJager_eQTL,AC_DeJager_eQTL,monocyte_ROSMAP_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000140955	chr16	84191137	84191138	ADAD2	Knight_eQTL
ENSG00000140961	chr16	83931310	83931311	OSGIN1	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000141012	chr16	88856969	88856970	GALNS	ROSMAP_DLPFC_sQTL
ENSG00000153786	chr16	85011534	85011535	ZDHHC7	Ast_mega_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000153815	chr16	81444807	81444808	CMIP	MiGA_GTS_eQTL,MiGA_SVZ_eQTL,monocyte_ROSMAP_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000154099	chr16	84145307	84145308	DNAAF1	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000166454	chr16	81035841	81035842	ATMIN	MiGA_SVZ_eQTL
ENSG00000166558	chr16	84042794	84042795	SLC38A8	BM_36_MSBB_eQTL
ENSG00000167508	chr16	88663160	88663161	MVD	ROSMAP_AC_sQTL
ENSG00000167523	chr16	89657739	89657740	SPATA33	ROSMAP_DLPFC_sQTL
ENSG00000184860	chr16	82011480	82011481	SDR42E1	MiGA_GFM_eQTL
ENSG00000205078	chr16	77199407	77199408	SYCE1L	MiGA_SVZ_eQTL
ENSG00000230989	chr16	83807977	83807978	HSBP1	MiGA_GTS_eQTL,Exc_mega_eQTL,STARNET_eQTL
ENSG00000260643	chr16	81096295	81096296	AC092718.2	MiGA_SVZ_eQTL
ENSG00000261609	chr16	81314943	81314944	GAN	MiGA_GTS_eQTL,MiGA_SVZ_eQTL,AC_DeJager_eQTL,Exc_mega_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000284512	chr16	81096283	81096284	AC092718.7	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL