micromamba install -n r_libs r-pecotmr
micromamba install -n r_libs r-bedmatrix

source('/data/interactive_analysis/rf2872/codes/cb_plot.R')
source('/data/interactive_analysis/rf2872/codes/utilis.R')
for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'CLU'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('/data/interactive_analysis/rf2872/codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/CLU/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/CLU/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

plot_TWAS_res(gene_id = gene_id)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.CLU.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(CLU_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for CLU csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/CLU/sec11.interaction_association_CLU_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

func_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
    ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/CLU/sec12.trans_fine_mapping_',gene_name,'.pdf'), height = 5, width = 8)
} else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000120885	chr8	25080000	29360000	27614699	chr8:25007602-26225312,chr8:26225312-27515963,chr8:27515963-29469590	8_25007602-26225312,8_26225312-27515963,8_27515963-29469590	8_25007602-26225312,8_26225312_27515963,8_27515963_29469590	TADB_697,TADB_698	chr8_24141394_29490278,chr8_26235323_30360222	27614700	27596917	chr8:18313692-26141968,chr8:19197380-29490278,chr8:21042397-30360222,chr8:24141394-31767006,chr8:26235323-32220984,chr8:28333064-35039104	CLU

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
Ast; DLPFC; AC; AD_Bellenguez_2022	0.9548921	8	0.2272865	13988; 13987; 13985; 13989; 13981; 13976; 13955; 13990	chr8:27608798:T:C; chr8:27598736:T:C; chr8:27608664:T:C; chr8:27607795:T:C; chr8:27608640:T:C; chr8:27607412:A:G; chr8:27604964:A:G; chr8:27607002:T:C	chr8:27608798:T:C	coloc_sets:Y2_Y7_Y8_Y17:CS1

	variants	Ast	DLPFC	AC	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>
chr8:27608640:T:C	chr8:27608640:T:C	-4.827589	-6.646252	-6.438557	12.08434
chr8:27607795:T:C	chr8:27607795:T:C	-4.827589	-6.646252	-6.438557	12.07229
chr8:27607412:A:G	chr8:27607412:A:G	-4.827589	-6.638479	-6.444127	11.95238
chr8:27608664:T:C	chr8:27608664:T:C	-4.827589	-6.648388	-6.447515	11.91667
chr8:27607002:T:C	chr8:27607002:T:C	-4.783657	-6.612231	-6.438848	11.97590
chr8:27604964:A:G	chr8:27604964:A:G	-4.783657	-6.612231	-6.438848	11.96386
chr8:27598736:T:C	chr8:27598736:T:C	-4.935008	-6.738925	-6.316061	11.53012
chr8:27608798:T:C	chr8:27608798:T:C	-5.048886	-6.865846	-6.422296	0.00000

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000012232	chr8	28600468	28600469	EXTL3	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000015592	chr8	27258419	27258420	STMN4	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000104228	chr8	27311271	27311272	TRIM35	BM_44_MSBB_eQTL,Exc_DeJager_eQTL,DLPFC_DeJager_eQTL,Exc_mega_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000104290	chr8	28494204	28494205	FZD3	BM_10_MSBB_eQTL,Oli_mega_eQTL
ENSG00000104299	chr8	28890241	28890242	INTS9	ROSMAP_PCC_sQTL
ENSG00000104756	chr8	25458475	25458476	KCTD9	ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000104765	chr8	26383053	26383054	BNIP3L	MSBB_BM36_pQTL
ENSG00000120875	chr8	29350683	29350684	DUSP4	MiGA_SVZ_eQTL
ENSG00000120899	chr8	27311481	27311482	PTK2B	MiGA_GTS_eQTL,MiGA_SVZ_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL,STARNET_eQTL
ENSG00000120907	chr8	26867277	26867278	ADRA1A	AC_DeJager_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000120915	chr8	27490780	27490781	EPHX2	MSBB_BM36_pQTL,DLPFC_Bennett_pQTL,ROSMAP_PCC_sQTL
ENSG00000134014	chr8	28089672	28089673	ELP3	MiGA_SVZ_eQTL,MiGA_THA_eQTL,PCC_DeJager_eQTL,Exc_mega_eQTL
ENSG00000147419	chr8	27772652	27772653	CCDC25	MiGA_SVZ_eQTL,BM_22_MSBB_eQTL,ROSMAP_AC_sQTL
ENSG00000147421	chr8	28890394	28890395	HMBOX1	Inh_Kellis_eQTL
ENSG00000147437	chr8	25424653	25424654	GNRH1	MiGA_GTS_eQTL,MiGA_THA_eQTL,ROSMAP_AC_sQTL
ENSG00000147459	chr8	25184688	25184689	DOCK5	BM_22_MSBB_eQTL,DLPFC_Bennett_pQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000168077	chr8	27633867	27633868	SCARA3	Exc_DeJager_eQTL,OPC_Kellis_eQTL,Ast_mega_eQTL,Exc_mega_eQTL,ROSMAP_PCC_sQTL
ENSG00000168078	chr8	27838081	27838082	PBK	MiGA_SVZ_eQTL,MiGA_THA_eQTL
ENSG00000168079	chr8	27992672	27992673	SCARA5	ROSMAP_PCC_sQTL
ENSG00000171320	chr8	27771948	27771949	ESCO2	MiGA_GFM_eQTL
ENSG00000184661	chr8	25459198	25459199	CDCA2	STARNET_eQTL
ENSG00000186918	chr8	28402700	28402701	ZNF395	MiGA_THA_eQTL,Oli_Kellis_eQTL
ENSG00000189233	chr8	28083935	28083936	NUGGC	MiGA_SVZ_eQTL
ENSG00000197892	chr8	29263123	29263124	KIF13B	DLPFC_DeJager_eQTL,Oli_Kellis_eQTL,Oli_mega_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000214050	chr8	28490277	28490278	FBXO16	AC_DeJager_eQTL,Inh_mega_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000221818	chr8	26045412	26045413	EBF2	DLPFC_DeJager_eQTL,ROSMAP_AC_sQTL
ENSG00000221914	chr8	26291507	26291508	PPP2R2A	MiGA_GTS_eQTL,BM_10_MSBB_eQTL
ENSG00000240694	chr8	26514091	26514092	PNMA2	MiGA_SVZ_eQTL,DLPFC_DeJager_eQTL,Ast_Kellis_eQTL,Exc_Kellis_eQTL,Inh_Kellis_eQTL,Exc_mega_eQTL

Case study: CLU xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶