micromamba install -n r_libs r-pecotmr

# If an error occurs while sourcing scripts, it might be because your get() returned NULL. 
#Please restart the kernel or click the R kernel in the upper right corner to resolve the issue.
source('../../codes/cb_plot.R')
source('../../codes/utilis.R')

for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'PRSS36'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/PRSS36/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.Error : File '/data/GWAS/ADGWAS_sumstats/16_29685831-46381513.RSS_QC_RAISS_imputed.AD_Kunkle_Stage1_2019.sumstats.tsv.gz' does not exist or is non-readable. getwd()=='/data/interactive_analysis/hs3163/GIT/xqtl-paper/AD_targets/PRSS36'
No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/PRSS36/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'PRSS36')

options(repr.plot.width = 10, repr.plot.height = 10)

for (mash_p_tmp in mash_p) {
    print(mash_p_tmp)
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.PRSS36.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_contexts <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(PRSS36_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for PRSS36 csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/PRSS36/sec11.interaction_association_PRSS36_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
   p =  ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/PRSS36/sec12.trans_fine_mapping_',gene_name,'.pdf'),p, height = 5, width = 8)
    p
    } else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000178226	chr16	28360000	3.4e+07	31150065	chr16:26796952-29685831,chr16:29685831-46381513	16_26796952-29685831,16_29685831-46381513	16_26796952-29685831,16_29685831_46381513	TADB_1165,TADB_1166	chr16_27341433_34991551,chr16_30975802_46400000	31150066	31138926	chr16:21437007-30613717,chr16:22388493-34991551,chr16:24031743-46400000,chr16:27341433-49746318,chr16:30975802-50968730	PRSS36

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000178226	chr16	28360000	3.4e+07	31150065	chr16:26796952-29685831,chr16:29685831-46381513	16_26796952-29685831,16_29685831-46381513	16_26796952-29685831,16_29685831_46381513	TADB_1165,TADB_1166	chr16_27341433_34991551,chr16_30975802_46400000	31150066	31138926	chr16:21437007-30613717,chr16:22388493-34991551,chr16:24031743-46400000,chr16:27341433-49746318,chr16:30975802-50968730	PRSS36

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
DLPFC; AC; PCC	0.7870407	15	0.4150519	8333; 8335; 8336; 8296; 8283; 8290; 8293; 8291; 8236; 8230; 8295; 8244; 8256; 8330; 8195	chr16:31144137:C:T; chr16:31143037:G:A; chr16:31142825:G:A; chr16:31122892:T:A; chr16:31120929:A:G; chr16:31121341:G:A; chr16:31122128:C:T; chr16:31121779:G:A; chr16:31141376:T:TAAACAAAC; chr16:31103607:G:C; chr16:31116361:TGCCACCACGCCCGGCTAAATTTTGTATTTTTAGTAGAGACGGAGTTTCACGGTGTTAGCCAGGATGGTCTCGATCTCCGCCCGCCTCGGCCTCTCAAAGTGCTGAGATTACAGGCGTGA:*; chr16:31107127:AT:ATT; chr16:31111250:C:T; chr16:31115000:C:A; chr16:31122738:G:A	chr16:31142825:G:A	coloc_sets:Y1_Y2_Y3:MergeCS1

	variants	DLPFC	AC	PCC
	<chr>	<dbl>	<dbl>	<dbl>
chr16:31142825:G:A	chr16:31142825:G:A	-16.26424	-25.06383	-10.375252
chr16:31143037:G:A	chr16:31143037:G:A	16.43975	25.04384	10.143564
chr16:31144137:C:T	chr16:31144137:C:T	16.49991	24.86209	10.143564
chr16:31122892:T:A	chr16:31122892:T:A	15.99842	24.30697	10.484075
chr16:31120929:A:G	chr16:31120929:A:G	15.97668	24.45852	10.415017
chr16:31121341:G:A	chr16:31121341:G:A	15.97668	24.45852	10.415017
chr16:31122128:C:T	chr16:31122128:C:T	15.97668	24.45852	10.415017
chr16:31121779:G:A	chr16:31121779:G:A	15.93449	24.45082	10.415017
chr16:31111250:C:T	chr16:31111250:C:T	15.88256	24.25614	10.399259
chr16:31107127:AT:ATT	chr16:31107127:AT:ATT	15.62171	24.04618	10.479553
chr16:31122738:G:A	chr16:31122738:G:A	15.95428	24.12656	10.200624
chr16:31115000:C:A	chr16:31115000:C:A	15.86255	24.14208	10.200624
chr16:31116361:TGCCACCACGCCCGGCTAAATTTTGTATTTTTAGTAGAGACGGAGTTTCACGGTGTTAGCCAGGATGGTCTCGATCTCCGCCCGCCTCGGCCTCTCAAAGTGCTGAGATTACAGGCGTGA:*	chr16:31116361:TGCCACCACGCCCGGCTAAATTTTGTATTTTTAGTAGAGACGGAGTTTCACGGTGTTAGCCAGGATGGTCTCGATCTCCGCCCGCCTCGGCCTCTCAAAGTGCTGAGATTACAGGCGTGA:*	15.43432	23.82350	9.927840
chr16:31141376:T:TAAACAAAC	chr16:31141376:T:TAAACAAAC	15.90821	23.27277	9.410887
chr16:31103607:G:C	chr16:31103607:G:C	10.92460	19.26562	8.086237

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000005844	chr16	30472657	30472658	ITGAL	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000052344	chr16	31135726	31135727	PRSS8	ROSMAP_AC_sQTL
ENSG00000077235	chr16	27549912	27549913	GTF3C1	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000079616	chr16	29790718	29790719	KIF22	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000090238	chr16	30096914	30096915	YPEL3	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000099364	chr16	30923054	30923055	FBXL19	ROSMAP_DLPFC_sQTL
ENSG00000099365	chr16	31010637	31010638	STX1B	ROSMAP_AC_sQTL
ENSG00000099381	chr16	30957753	30957754	SETD1A	ROSMAP_DLPFC_sQTL
ENSG00000102870	chr16	30787204	30787205	ZNF629	ROSMAP_AC_sQTL
ENSG00000102879	chr16	30182826	30182827	CORO1A	ROSMAP_PCC_sQTL
ENSG00000102882	chr16	30123505	30123506	MAPK3	ROSMAP_AC_sQTL
ENSG00000103485	chr16	29663278	29663279	QPRT	MSBB_BM36_pQTL,PCC_DeJager_eQTL
ENSG00000103496	chr16	31032888	31032889	STX4	Exc_mega_eQTL
ENSG00000103507	chr16	31106106	31106107	BCKDK	ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000103510	chr16	31114488	31114489	KAT8	Exc_DeJager_eQTL,Inh_DeJager_eQTL,PCC_DeJager_eQTL,ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000103549	chr16	30761744	30761745	RNF40	ROSMAP_PCC_sQTL
ENSG00000140678	chr16	31355173	31355174	ITGAX	ROSMAP_AC_sQTL
ENSG00000140682	chr16	31471584	31471585	TGFB1I1	ROSMAP_DLPFC_sQTL
ENSG00000140691	chr16	31458079	31458080	ARMC5	ROSMAP_DLPFC_sQTL
ENSG00000149922	chr16	30091923	30091924	TBX6	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000149926	chr16	30052977	30052978	TLCD3B	ROSMAP_AC_sQTL
ENSG00000149927	chr16	30023269	30023270	DOC2A	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000149930	chr16	29973867	29973868	TAOK2	ROSMAP_DLPFC_sQTL
ENSG00000156853	chr16	30624011	30624012	ZNF689	MiGA_SVZ_eQTL
ENSG00000167397	chr16	31095979	31095980	VKORC1	ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000169221	chr16	30370493	30370494	TBC1D10B	MiGA_SVZ_eQTL
ENSG00000169592	chr16	29995714	29995715	INO80E	ROSMAP_PCC_sQTL
ENSG00000169877	chr16	31527899	31527900	AHSP	MiGA_GTS_eQTL
ENSG00000169896	chr16	31259966	31259967	ITGAM	MiGA_THA_eQTL,ROSMAP_AC_sQTL
ENSG00000174938	chr16	29899546	29899547	SEZ6L2	MiGA_GTS_eQTL
ENSG00000174939	chr16	29900374	29900375	ASPHD1	MiGA_SVZ_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000174943	chr16	29926235	29926236	KCTD13	MSBB_BM36_pQTL
ENSG00000176723	chr16	31443159	31443160	ZNF843	MiGA_GTS_eQTL
ENSG00000177238	chr16	31214090	31214091	TRIM72	Knight_eQTL
ENSG00000183336	chr16	29454963	29454964	BOLA2	DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000185905	chr16	29745989	29745990	C16orf54	MiGA_GFM_eQTL,MiGA_GTS_eQTL,MiGA_THA_eQTL
ENSG00000198064	chr16	30254509	30254510	NPIPB13	Exc_mega_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000205609	chr16	28403840	28403841	EIF3CL	ROSMAP_DLPFC_sQTL
ENSG00000213658	chr16	28984825	28984826	LAT	DLPFC_DeJager_eQTL
ENSG00000254206	chr16	29404028	29404029	NPIPB11	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000255439	chr16	31094955	31094956	AC135050.2	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000261740	chr16	29454650	29454651	BOLA2-SMG1P6	DLPFC_DeJager_eQTL,AC_DeJager_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000280893	chr16	29812260	29812261	AC009133.6	ROSMAP_PCC_sQTL
ENSG00000282034	chr16	30704066	30704067	AC106886.5	ROSMAP_DLPFC_sQTL

Case study: PRSS36 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶