micromamba install -n r_libs r-pecotmr

# If an error occurs while sourcing scripts, it might be because your get() returned NULL. 
#Please restart the kernel or click the R kernel in the upper right corner to resolve the issue.
source('../../codes/cb_plot.R')
source('../../codes/utilis.R')

for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'KNOP1'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/KNOP1/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/KNOP1/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'KNOP1')

options(repr.plot.width = 10, repr.plot.height = 10)

for (mash_p_tmp in mash_p) {
    print(mash_p_tmp)
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.KNOP1.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_contexts <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(KNOP1_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for KNOP1 csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/KNOP1/sec11.interaction_association_KNOP1_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
   p =  ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/KNOP1/sec12.trans_fine_mapping_',gene_name,'.pdf'),p, height = 5, width = 8)
    p
    } else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000103550	chr16	18160000	20718227	19718226	chr16:15916446-19268913,chr16:19268913-20139051,chr16:20139051-21586331	16_15916446-19268913,16_19268913-20139051,16_20139051-21586331	16_15916446-19268913,16_19268913_20139051,16_20139051_21586331	TADB_1159,TADB_1160	chr16_17002806_20886167,chr16_19028239_22550373	19718227	19701937	chr16:9840384-19598328,chr16:13672548-20886167,chr16:15149565-22550373,chr16:17002806-23576865,chr16:19028239-24596316,chr16:20376647-26257845	KNOP1

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000103550	chr16	18160000	20718227	19718226	chr16:15916446-19268913,chr16:19268913-20139051,chr16:20139051-21586331	16_15916446-19268913,16_19268913-20139051,16_20139051-21586331	16_15916446-19268913,16_19268913_20139051,16_20139051_21586331	TADB_1159,TADB_1160	chr16_17002806_20886167,chr16_19028239_22550373	19718227	19701937	chr16:9840384-19598328,chr16:13672548-20886167,chr16:15149565-22550373,chr16:17002806-23576865,chr16:19028239-24596316,chr16:20376647-26257845	KNOP1

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
Ast; Oli; OPC; Exc; Inh; DLPFC; AC; PCC; AD_Bellenguez_2022	0.9960112	8	0.7316266	6373; 6359; 6368; 6366; 6377; 6363; 6367; 6387	chr16:19711044:T:C; chr16:19708463:CTCA:C; chr16:19710484:C:T; chr16:19710409:A:G; chr16:19712698:A:G; chr16:19708936:A:G; chr16:19710433:C:T; chr16:19716676:A:G	chr16:19711044:T:C	coloc_sets:Y1_Y2_Y3_Y4_Y5_Y6_Y7_Y8_Y10:CS1

	variants	Ast	Oli	OPC	Exc	Inh	DLPFC	AC	PCC	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr16:19711044:T:C	chr16:19711044:T:C	29.97969	32.60936	16.39144	26.44051	20.54724	33.88967	21.90834	18.08904	-4.212963
chr16:19708463:CTCA:C	chr16:19708463:CTCA:C	29.98283	32.84780	16.49494	26.29611	20.55352	33.65862	21.85248	18.05163	-4.072072
chr16:19710484:C:T	chr16:19710484:C:T	29.54119	32.52665	16.39144	26.16733	20.44829	33.84469	21.82629	18.02935	-4.250000
chr16:19710409:A:G	chr16:19710409:A:G	29.54119	32.52665	16.39144	26.16733	20.44829	33.84469	21.82629	18.02935	-4.240741
chr16:19712698:A:G	chr16:19712698:A:G	29.54119	32.52665	16.39144	26.16733	20.44829	33.84469	21.82629	18.02935	-4.212963
chr16:19708936:A:G	chr16:19708936:A:G	29.54119	32.52665	16.39144	26.16733	20.44829	33.84469	21.82629	18.02935	-4.203704
chr16:19710433:C:T	chr16:19710433:C:T	29.54119	32.52665	16.39144	26.16733	20.44829	33.84469	21.82629	18.02935	-4.183491
chr16:19716676:A:G	chr16:19716676:A:G	29.54119	32.52665	16.39144	26.16733	20.44829	33.84469	21.82629	18.02935	-4.186916

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000006007	chr16	19522122	19522123	GDE1	MiGA_SVZ_eQTL,DLPFC_Bennett_pQTL
ENSG00000066654	chr16	20742083	20742084	THUMPD1	MiGA_SVZ_eQTL,BM_10_MSBB_eQTL,Oli_Kellis_eQTL,DLPFC_Bennett_pQTL
ENSG00000102897	chr16	20899867	20899868	LYRM1	ROSMAP_DLPFC_sQTL
ENSG00000103226	chr16	16232505	16232506	NOMO3	Exc_DeJager_eQTL,Inh_DeJager_eQTL,Exc_mega_eQTL,ROSMAP_PCC_sQTL
ENSG00000103528	chr16	19167970	19167971	SYT17	MiGA_SVZ_eQTL,MiGA_THA_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000103534	chr16	19410495	19410496	TMC5	Knight_eQTL,BM_44_MSBB_eQTL,DLPFC_DeJager_eQTL
ENSG00000103540	chr16	19523810	19523811	CCP110	STARNET_eQTL
ENSG00000103544	chr16	19555239	19555240	VPS35L	MiGA_SVZ_eQTL,OPC_Kellis_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000140749	chr16	21652607	21652608	IGSF6	ROSMAP_PCC_sQTL
ENSG00000157106	chr16	18926407	18926408	SMG1	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000158486	chr16	21159440	21159441	DNAH3	MiGA_SVZ_eQTL
ENSG00000167186	chr16	19067613	19067614	COQ7	MiGA_GTS_eQTL,MSBB_BM36_pQTL,Exc_DeJager_eQTL,DLPFC_DeJager_eQTL,Exc_Kellis_eQTL,Exc_mega_eQTL,STARNET_eQTL
ENSG00000167191	chr16	19886166	19886167	GPRC5B	BM_10_MSBB_eQTL,BM_22_MSBB_eQTL,MSBB_BM36_pQTL
ENSG00000169344	chr16	20356300	20356301	UMOD	MiGA_SVZ_eQTL
ENSG00000170537	chr16	18983933	18983934	TMC7	Inh_mega_eQTL
ENSG00000170540	chr16	18801552	18801553	ARL6IP1	MiGA_GFM_eQTL
ENSG00000183549	chr16	20409533	20409534	ACSM5	BM_44_MSBB_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000183889	chr16	16317443	16317444	AC138969.1	BM_22_MSBB_eQTL,BM_36_MSBB_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL
ENSG00000185164	chr16	18562210	18562211	NOMO2	MiGA_THA_eQTL,BM_44_MSBB_eQTL,Oli_DeJager_eQTL,Inh_DeJager_eQTL,PCC_DeJager_eQTL,Oli_Kellis_eQTL,Exc_mega_eQTL,Inh_mega_eQTL,Oli_mega_eQTL,ROSMAP_AC_sQTL
ENSG00000196678	chr16	20900348	20900349	ERI2	MiGA_SVZ_eQTL
ENSG00000205730	chr16	19113931	19113932	ITPRIPL2	MiGA_GFM_eQTL,MiGA_GTS_eQTL,STARNET_eQTL
ENSG00000214940	chr16	18336735	18336736	NPIPA8	DLPFC_DeJager_eQTL,Oli_Kellis_eQTL,Exc_Kellis_eQTL,Inh_Kellis_eQTL
ENSG00000233024	chr16	18379330	18379331	NPIPA9	BM_10_MSBB_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000261210	chr16	19285730	19285731	CLEC19A	MiGA_GFM_eQTL,BM_36_MSBB_eQTL,STARNET_eQTL

Case study: KNOP1 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶