micromamba install -n r_libs r-pecotmr

# If an error occurs while sourcing scripts, it might be because your get() returned NULL. 
#Please restart the kernel or click the R kernel in the upper right corner to resolve the issue.
source('../../codes/cb_plot.R')
source('../../codes/utilis.R')

for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'PPP5C'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/PPP5C/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/PPP5C/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'PPP5C')

options(repr.plot.width = 10, repr.plot.height = 10)

for (mash_p_tmp in mash_p) {
    print(mash_p_tmp)
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.PPP5C.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_contexts <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(PPP5C_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for PPP5C csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/PPP5C/sec11.interaction_association_PPP5C_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
   p =  ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/PPP5C/sec12.trans_fine_mapping_',gene_name,'.pdf'),p, height = 5, width = 8)
    p
    } else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000011485	chr19	44680000	47960000	46347086	chr19:42346101-44935906,chr19:44935906-46842901,chr19:46842901-48590136	19_42346101-44935906,19_44935906-46842901,19_46842901-48590136	19_42346101-44935906,19_44935906_46842901,19_46842901_48590136	TADB_1261,TADB_1262,TADB_1263	chr19_40837074_46645602,chr19_43631573_48886315,chr19_46290022_55473296	46347087	46392981	chr19:31719752-46645602,chr19:34641744-48886315,chr19:40837074-55473296,chr19:43631573-57160893,chr19:46290022-58617616	PPP5C

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000011485	chr19	44680000	47960000	46347086	chr19:42346101-44935906,chr19:44935906-46842901,chr19:46842901-48590136	19_42346101-44935906,19_44935906-46842901,19_46842901-48590136	19_42346101-44935906,19_44935906_46842901,19_46842901_48590136	TADB_1261,TADB_1262,TADB_1263	chr19_40837074_46645602,chr19_43631573_48886315,chr19_46290022_55473296	46347087	46392981	chr19:31719752-46645602,chr19:34641744-48886315,chr19:40837074-55473296,chr19:43631573-57160893,chr19:46290022-58617616	PPP5C

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
Ast; DLPFC; AC; PCC	1	13	0.07631761	9161; 9168; 9169; 9176; 9187; 9195; 9217; 9221; 9227; 9231; 9268; 9085; 9104	chr19:46363525:T:G; chr19:46365473:C:G; chr19:46365703:A:G; chr19:46368010:G:T; chr19:46370708:A:G; chr19:46372136:T:C; chr19:46379203:A:G; chr19:46380837:C:T; chr19:46382675:A:G; chr19:46383750:G:C; chr19:46390208:A:G; chr19:46346219:A:G; chr19:46351675:A:G	chr19:46346219:A:G	coloc_sets:Y2_Y7_Y8_Y9:CS1

	variants	Ast	DLPFC	AC	PCC
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>
chr19:46363525:T:G	chr19:46363525:T:G	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46365473:C:G	chr19:46365473:C:G	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46365703:A:G	chr19:46365703:A:G	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46368010:G:T	chr19:46368010:G:T	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46370708:A:G	chr19:46370708:A:G	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46372136:T:C	chr19:46372136:T:C	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46379203:A:G	chr19:46379203:A:G	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46380837:C:T	chr19:46380837:C:T	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46382675:A:G	chr19:46382675:A:G	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46383750:G:C	chr19:46383750:G:C	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46390208:A:G	chr19:46390208:A:G	-6.142655	-4.618099	-4.884360	-4.216317
chr19:46346219:A:G	chr19:46346219:A:G	-6.142655	-4.702220	-4.792844	-4.216317
chr19:46351675:A:G	chr19:46351675:A:G	-6.142655	-4.620000	-4.690080	-4.216317

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000007047	chr19	45079287	45079288	MARK4	MiGA_GTS_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000010310	chr19	45668220	45668221	GIPR	MiGA_SVZ_eQTL,MiGA_THA_eQTL,BM_10_MSBB_eQTL,DLPFC_DeJager_eQTL,Oli_Kellis_eQTL,Oli_mega_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000011478	chr19	45692402	45692403	QPCTL	MiGA_SVZ_eQTL
ENSG00000012061	chr19	45478827	45478828	ERCC1	MiGA_THA_eQTL,DLPFC_DeJager_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000039650	chr19	49867908	49867909	PNKP	ROSMAP_DLPFC_sQTL
ENSG00000062822	chr19	50384203	50384204	POLD1	ROSMAP_PCC_sQTL
ENSG00000063127	chr19	49325214	49325215	SLC6A16	ROSMAP_DLPFC_sQTL
ENSG00000069399	chr19	44747835	44747836	BCL3	ROSMAP_AC_sQTL,STARNET_eQTL
ENSG00000073050	chr19	43580472	43580473	XRCC1	ROSMAP_PCC_sQTL
ENSG00000074219	chr19	49362456	49362457	TEAD2	ROSMAP_DLPFC_sQTL
ENSG00000079432	chr19	42268536	42268537	CIC	ROSMAP_AC_sQTL
ENSG00000090372	chr19	46746993	46746994	STRN4	monocyte_ROSMAP_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000090554	chr19	49474206	49474207	FLT3LG	ROSMAP_AC_sQTL
ENSG00000104783	chr19	43781256	43781257	KCNN4	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000104812	chr19	48993309	48993310	GYS1	MiGA_SVZ_eQTL
ENSG00000104853	chr19	44954590	44954591	CLPTM1	Knight_eQTL,PCC_DeJager_eQTL,DLPFC_Klein_gpQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000104859	chr19	45039044	45039045	CLASRP	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000104866	chr19	45091395	45091396	PPP1R37	STARNET_eQTL
ENSG00000104879	chr19	45322874	45322875	CKM	BM_10_MSBB_eQTL
ENSG00000104881	chr19	45406348	45406349	PPP1R13L	MiGA_SVZ_eQTL,ROSMAP_PCC_sQTL
ENSG00000104884	chr19	45370917	45370918	ERCC2	MiGA_GTS_eQTL,MiGA_THA_eQTL
ENSG00000104936	chr19	45782551	45782552	DMPK	MiGA_GFM_eQTL,MiGA_THA_eQTL,Ast_DeJager_eQTL,Exc_Kellis_eQTL,Inh_mega_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000104941	chr19	45815307	45815308	RSPH6A	DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL
ENSG00000104951	chr19	49929538	49929539	IL4I1	ROSMAP_AC_sQTL
ENSG00000104967	chr19	45974043	45974044	NOVA2	MiGA_SVZ_eQTL
ENSG00000104983	chr19	45995460	45995461	CCDC61	ROSMAP_DLPFC_sQTL
ENSG00000105281	chr19	46788593	46788594	SLC1A5	MiGA_GTS_eQTL,MiGA_THA_eQTL,monocyte_ROSMAP_eQTL,STARNET_eQTL
ENSG00000105287	chr19	46717126	46717127	PRKD2	MiGA_SVZ_eQTL,MiGA_THA_eQTL,Exc_DeJager_eQTL,OPC_Kellis_eQTL,Exc_Kellis_eQTL,Exc_mega_eQTL,ROSMAP_AC_sQTL
ENSG00000105321	chr19	47255979	47255980	CCDC9	MiGA_GTS_eQTL,MiGA_THA_eQTL,MSBB_BM36_pQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000105357	chr19	50188185	50188186	MYH14	ROSMAP_PCC_sQTL
...	...	...	...	...	...
ENSG00000177464	chr19	45602211	45602212	OPA3	DLPFC_Bennett_pQTL
ENSG00000177464	chr19	45602211	45602212	GPR4	DLPFC_Bennett_pQTL
ENSG00000178980	chr19	47778584	47778585	SELENOW	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000181027	chr19	46746045	46746046	FKRP	MiGA_SVZ_eQTL,MiGA_THA_eQTL,BM_44_MSBB_eQTL,Ast_DeJager_eQTL,AC_DeJager_eQTL,Oli_Kellis_eQTL,Ast_mega_eQTL,Exc_mega_eQTL,Oli_mega_eQTL,STARNET_eQTL
ENSG00000182013	chr19	46471562	46471563	PNMA8A	Oli_DeJager_eQTL,Inh_DeJager_eQTL,AC_DeJager_eQTL
ENSG00000182264	chr19	48746908	48746909	IZUMO1	MiGA_SVZ_eQTL
ENSG00000186567	chr19	44662277	44662278	CEACAM19	MiGA_SVZ_eQTL
ENSG00000187244	chr19	44809070	44809071	BCAM	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000188624	chr19	46124687	46124688	IGFL3	MiGA_THA_eQTL
ENSG00000189114	chr19	45178783	45178784	BLOC1S3	PCC_DeJager_eQTL,AC_DeJager_eQTL
ENSG00000189190	chr19	52786806	52786807	ZNF600	MiGA_THA_eQTL
ENSG00000197380	chr19	46661181	46661182	DACT3	ROSMAP_PCC_sQTL
ENSG00000197405	chr19	47290022	47290023	C5AR1	MiGA_SVZ_eQTL
ENSG00000203326	chr19	53365703	53365704	ZNF525	Knight_eQTL,BM_36_MSBB_eQTL
ENSG00000204673	chr19	49878458	49878459	AKT1S1	DLPFC_Bennett_pQTL
ENSG00000204869	chr19	46077093	46077094	IGFL4	Inh_DeJager_eQTL,Exc_Kellis_eQTL,Inh_Kellis_eQTL,Exc_mega_eQTL,Inh_mega_eQTL
ENSG00000204941	chr19	43186535	43186536	PSG5	ROSMAP_DLPFC_sQTL
ENSG00000213889	chr19	45488776	45488777	PPM1N	MiGA_THA_eQTL
ENSG00000216588	chr19	44613562	44613563	IGSF23	MiGA_THA_eQTL
ENSG00000221923	chr19	52369916	52369917	ZNF880	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000224916	chr19	44942237	44942238	APOC4-APOC2	MiGA_GFM_eQTL,STARNET_eQTL
ENSG00000234906	chr19	44946034	44946035	APOC2	STARNET_eQTL
ENSG00000256683	chr19	51986855	51986856	ZNF350	ROSMAP_PCC_sQTL
ENSG00000267467	chr19	44942236	44942237	APOC4	STARNET_eQTL
ENSG00000267680	chr19	44094338	44094339	ZNF224	ROSMAP_AC_sQTL
ENSG00000268500	chr19	51646888	51646889	AC018755.2	STARNET_eQTL
ENSG00000269403	chr19	51368098	51368099	AC008750.7	ROSMAP_DLPFC_sQTL
ENSG00000269469	chr19	49462751	49462752	AC010619.1	ROSMAP_AC_sQTL
ENSG00000277531	chr19	46428950	46428951	PNMA8C	BM_36_MSBB_eQTL
ENSG00000285505	chr19	41994231	41994232	AC010616.1	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL

Case study: PPP5C xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶