region_p

pip_p

source('/data/interactive_analysis/rf2872/codes/cb_plot.R')
source('/data/interactive_analysis/rf2872/codes/utilis.R')
for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'APH1B'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('/data/interactive_analysis/rf2872/codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

options(repr.plot.width=6, repr.plot.height=6)

ggplot(APH1B_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for APH1B csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/APH1B/sec11.interaction_association_APH1B_lessPIP25.pdf', height = 5, width = 8)

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/APH1B/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'APH1B')
for (plot in mash_p) {
    print(plot)
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.APH1B.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() +  facet_grid(cs_coverage_0.95+study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 4) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 4) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(APH1B_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for APH1B csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/APH1B/sec11.interaction_association_APH1B_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

func_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
    ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/APH1B/sec12.trans_fine_mapping_',gene_name,'.pdf'), height = 5, width = 8)
} else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000138613	chr15	62276017	65480000	63276017	chr15:61125463-63051119,chr15:63051119-66680537	15_61125463-63051119,15_63051119-66680537	15_61125463-63051119,15_63051119_66680537	TADB_1132,TADB_1133,TADB_1134	chr15_58574103_63343138,chr15_60834681_64158021,chr15_61390525_66517704	63276018	63309126	chr15:54171378-63343138,chr15:56375966-64158021,chr15:58574103-66517704,chr15:60834681-67685794,chr15:61390525-69257131,chr15:64234460-70062762,chr15:65293216-73640125	APH1B

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
Ast; Oli; Exc; DLPFC; AC; PCC; Monocyte; AD_Bellenguez_2022	1	2	0.8086306	4999; 5009	chr15:63277703:C:T; chr15:63279621:C:T	chr15:63279621:C:T	coloc_sets:Y2_Y3_Y5_Y7_Y8_Y9_Y10_Y16:CS1

	variants	Ast	Oli	Exc	DLPFC	AC	PCC	Monocyte	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr15:63277703:C:T	chr15:63277703:C:T	4.548767	5.860438	6.84169	7.817647	6.837688	4.895018	5.048955	9.495798
chr15:63279621:C:T	chr15:63279621:C:T	4.548767	5.860438	6.84169	7.833740	6.594349	4.649904	5.048955	9.100000

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000035664	chr15	64072032	64072033	DAPK2	BM_36_MSBB_eQTL,BM_44_MSBB_eQTL
ENSG00000074410	chr15	63381845	63381846	CA12	Knight_eQTL,BM_10_MSBB_eQTL,BM_44_MSBB_eQTL,Ast_DeJager_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,Ast_Kellis_eQTL,Ast_mega_eQTL,ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000074621	chr15	65611377	65611378	SLC24A1	MiGA_SVZ_eQTL
ENSG00000090470	chr15	65133807	65133808	PDCD7	MiGA_SVZ_eQTL
ENSG00000103642	chr15	63121832	63121833	LACTB	MiGA_SVZ_eQTL,Exc_DeJager_eQTL
ENSG00000103657	chr15	63833947	63833948	HERC1	MiGA_THA_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000103710	chr15	65076689	65076690	RASL12	BM_22_MSBB_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000103742	chr15	65422946	65422947	IGDCC4	BM_22_MSBB_eQTL,Ast_DeJager_eQTL,Inh_DeJager_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL,Inh_Kellis_eQTL,Ast_mega_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000129003	chr15	62060472	62060473	VPS13C	ROSMAP_PCC_sQTL
ENSG00000138614	chr15	65611288	65611289	INTS14	MiGA_THA_eQTL
ENSG00000140416	chr15	63042631	63042632	TPM1	ROSMAP_DLPFC_sQTL
ENSG00000140455	chr15	63504510	63504511	USP3	Mic_DeJager_eQTL,Mic_mega_eQTL,OPC_mega_eQTL
ENSG00000166128	chr15	63189559	63189560	RAB8B	OPC_Kellis_eQTL,Ast_mega_eQTL,DLPFC_Bennett_pQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000166794	chr15	64163133	64163134	PPIB	MiGA_GTS_eQTL,Inh_Kellis_eQTL
ENSG00000166803	chr15	64387686	64387687	PCLAF	MiGA_GTS_eQTL
ENSG00000166839	chr15	64911901	64911902	ANKDD1A	Ast_Kellis_eQTL
ENSG00000169118	chr15	64356172	64356173	CSNK1G1	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000171914	chr15	62390525	62390526	TLN2	MiGA_GTS_eQTL,AC_DeJager_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000174446	chr15	66497779	66497780	SNAPC5	MiGA_SVZ_eQTL
ENSG00000174498	chr15	65378001	65378002	IGDCC3	MiGA_GTS_eQTL
ENSG00000180304	chr15	64703280	64703281	OAZ2	MiGA_SVZ_eQTL
ENSG00000180357	chr15	64460741	64460742	ZNF609	ROSMAP_PCC_sQTL
ENSG00000185088	chr15	63158020	63158021	RPS27L	MiGA_THA_eQTL,ROSMAP_PCC_sQTL
ENSG00000186198	chr15	65045386	65045387	SLC51B	BM_44_MSBB_eQTL
ENSG00000205502	chr15	62165284	62165285	C2CD4B	MiGA_THA_eQTL
ENSG00000241839	chr15	64841882	64841883	PLEKHO2	MiGA_SVZ_eQTL
ENSG00000246922	chr15	65115199	65115200	UBAP1L	Inh_Kellis_eQTL
ENSG00000249240	chr15	64841947	64841948	AC069368.1	ROSMAP_AC_sQTL
ENSG00000259316	chr15	64381439	64381440	AC087632.2	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL

Case study: APH1B xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶