micromamba install -n r_libs r-pecotmr

source('../../codes/cb_plot.R')
source('../../codes/utilis.R')
for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'GALNT6'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/GALNT6/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/GALNT6/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'GALNT6')
for (plot in mash_p) {
    print(plot)
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.GALNT6.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(GALNT6_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for GALNT6 csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/GALNT6/sec11.interaction_association_GALNT6_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

func_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
   p =  ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/CR1/sec12.trans_fine_mapping_',gene_name,'.pdf'),p, height = 5, width = 8)
    p
    } else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000139629	chr12	48680000	52392867	51392866	chr12:47790772-48762437,chr12:48762437-51189586,chr12:51189586-52645289	12_47790772-48762437,12_48762437-51189586,12_51189586-52645289	12_47790772-48762437,12_48762437_51189586,12_51189586_52645289	TADB_968,TADB_969	chr12_47653211_53108261,chr12_50815042_54677408	51392867	51351247	chr12:41238446-49957487,chr12:42353866-53108261,chr12:44164185-54677408,chr12:47653211-57041806,chr12:50815042-58959148,chr12:52006281-61419293	GALNT6

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
Oli; DLPFC; AC; PCC; Monocyte; AD_Bellenguez_2022	1.000000	1	0.9996812	10602	chr12:51362485:T:C	chr12:51362485:T:C	coloc_sets:Y1_Y2_Y3_Y4_Y5_Y12:CS1
Oli; DLPFC; AC; PCC; Monocyte	0.983802	2	0.8964368	10715; 10708	chr12:51391617:A:AAGCCGC; chr12:51389636:T:A	chr12:51389636:T:A	coloc_sets:Y1_Y2_Y3_Y4_Y5:CS2

	variants	Oli	DLPFC	AC	PCC	Monocyte	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr12:51362485:T:C	chr12:51362485:T:C	-19.06122	-22.94871	-24.94123	-16.17026	-4.208866	4.43617

	variants	Oli	DLPFC	AC	PCC	Monocyte
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr12:51391617:A:AAGCCGC	chr12:51391617:A:AAGCCGC	5.231128	8.410379	7.724888	4.454380	3.850570
chr12:51389636:T:A	chr12:51389636:T:A	5.392984	8.619879	7.379238	4.261449	3.650899

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000050405	chr12	50283545	50283546	LIMA1	MiGA_THA_eQTL
ENSG00000050426	chr12	51047961	51047962	LETMD1	MiGA_GFM_eQTL,MiGA_SVZ_eQTL,ROSMAP_PCC_sQTL
ENSG00000050438	chr12	51391316	51391317	SLC4A8	MiGA_SVZ_eQTL,Oli_DeJager_eQTL,Oli_Kellis_eQTL,Oli_mega_eQTL,DLPFC_Bennett_pQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000061273	chr12	47833131	47833132	HDAC7	MiGA_SVZ_eQTL
ENSG00000066084	chr12	50504984	50504985	DIP2B	MiGA_SVZ_eQTL,MiGA_THA_eQTL,BM_36_MSBB_eQTL,MSBB_BM36_pQTL
ENSG00000066117	chr12	50085199	50085200	SMARCD1	MiGA_GTS_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000079387	chr12	48106078	48106079	SENP1	ROSMAP_DLPFC_sQTL
ENSG00000086159	chr12	49967238	49967239	AQP6	MiGA_GTS_eQTL
ENSG00000110844	chr12	49568217	49568218	PRPF40B	Ast_DeJager_eQTL,Oli_DeJager_eQTL,Exc_DeJager_eQTL,Inh_DeJager_eQTL,Oli_Kellis_eQTL,OPC_Kellis_eQTL,Exc_Kellis_eQTL,Inh_Kellis_eQTL,Ast_mega_eQTL,Exc_mega_eQTL,Inh_mega_eQTL,OPC_mega_eQTL,Oli_mega_eQTL
ENSG00000110881	chr12	50057547	50057548	ASIC1	MiGA_GFM_eQTL,AC_DeJager_eQTL,Oli_mega_eQTL
ENSG00000110911	chr12	51028565	51028566	SLC11A2	MiGA_GTS_eQTL,Ast_DeJager_eQTL,Exc_Kellis_eQTL,Exc_mega_eQTL
ENSG00000110934	chr12	51324667	51324668	BIN2	MiGA_GFM_eQTL,Mic_DeJager_eQTL
ENSG00000111057	chr12	52948870	52948871	KRT18	MiGA_SVZ_eQTL
ENSG00000111371	chr12	46270016	46270017	SLC38A1	ROSMAP_PCC_sQTL
ENSG00000123268	chr12	50763709	50763710	ATF1	MiGA_GTS_eQTL,ROSMAP_AC_sQTL
ENSG00000123349	chr12	53295290	53295291	PFDN5	MiGA_SVZ_eQTL
ENSG00000123352	chr12	49366583	49366584	SPATS2	MiGA_GTS_eQTL,BM_44_MSBB_eQTL,Oli_Kellis_eQTL,Exc_mega_eQTL,Oli_mega_eQTL
ENSG00000123358	chr12	52022831	52022832	NR4A1	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000123395	chr12	52069245	52069246	ATG101	STARNET_eQTL
ENSG00000123416	chr12	49131396	49131397	TUBA1B	STARNET_eQTL
ENSG00000125084	chr12	48978321	48978322	WNT1	MiGA_THA_eQTL
ENSG00000129315	chr12	48716997	48716998	CCNT1	MiGA_GFM_eQTL,MiGA_SVZ_eQTL,ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000135390	chr12	53677407	53677408	ATP5MC2	ROSMAP_DLPFC_sQTL
ENSG00000135451	chr12	49323235	49323236	TROAP	MiGA_GTS_eQTL
ENSG00000135457	chr12	51173134	51173135	TFCP2	MiGA_THA_eQTL
ENSG00000135472	chr12	49904216	49904217	FAIM2	MiGA_GFM_eQTL,MiGA_GTS_eQTL,Oli_DeJager_eQTL,Oli_Kellis_eQTL,Inh_Kellis_eQTL,Oli_mega_eQTL,ROSMAP_AC_sQTL
ENSG00000135503	chr12	51951698	51951699	ACVR1B	MiGA_GTS_eQTL,MiGA_THA_eQTL,Exc_DeJager_eQTL,Inh_DeJager_eQTL,ROSMAP_DLPFC_sQTL,STARNET_eQTL
ENSG00000135519	chr12	49539029	49539030	KCNH3	ROSMAP_PCC_sQTL
ENSG00000139537	chr12	48904109	48904110	CCDC65	MiGA_SVZ_eQTL,Ast_DeJager_eQTL,Exc_DeJager_eQTL,Inh_DeJager_eQTL,Oli_Kellis_eQTL,Exc_mega_eQTL
ENSG00000139549	chr12	49094800	49094801	DHH	MiGA_SVZ_eQTL
...	...	...	...	...	...
ENSG00000161800	chr12	50033135	50033136	RACGAP1	MiGA_SVZ_eQTL,Oli_Kellis_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000161813	chr12	50392382	50392383	LARP4	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000161835	chr12	52006945	52006946	TAMALIN	MiGA_SVZ_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000167528	chr12	48351413	48351414	ZNF641	MiGA_SVZ_eQTL
ENSG00000167548	chr12	49060793	49060794	KMT2D	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000167550	chr12	49070024	49070025	RHEBL1	MiGA_THA_eQTL
ENSG00000167553	chr12	49188735	49188736	TUBA1C	Knight_eQTL
ENSG00000167612	chr12	51888008	51888009	ANKRD33	MiGA_GTS_eQTL
ENSG00000167767	chr12	52192013	52192014	KRT80	BM_22_MSBB_eQTL
ENSG00000169884	chr12	48971734	48971735	WNT10B	MiGA_GTS_eQTL,BM_10_MSBB_eQTL,BM_22_MSBB_eQTL
ENSG00000170421	chr12	52949919	52949920	KRT8	MiGA_SVZ_eQTL,MiGA_THA_eQTL
ENSG00000170523	chr12	52321397	52321398	KRT83	BM_10_MSBB_eQTL,BM_22_MSBB_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000170545	chr12	51270414	51270415	SMAGP	DLPFC_DeJager_eQTL
ENSG00000170653	chr12	53626409	53626410	ATF7	ROSMAP_DLPFC_sQTL
ENSG00000174233	chr12	48789088	48789089	ADCY6	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000174243	chr12	48852841	48852842	DDX23	Ast_DeJager_eQTL,ROSMAP_PCC_sQTL
ENSG00000177627	chr12	48482497	48482498	C12orf54	MiGA_SVZ_eQTL
ENSG00000178401	chr12	49346887	49346888	DNAJC22	BM_10_MSBB_eQTL,DLPFC_DeJager_eQTL
ENSG00000178449	chr12	50112081	50112082	COX14	MiGA_THA_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000181418	chr12	48999374	48999375	DDN	ROSMAP_PCC_sQTL
ENSG00000182544	chr12	53251250	53251251	MFSD5	MiGA_THA_eQTL
ENSG00000183283	chr12	51238723	51238724	DAZAP2	ROSMAP_DLPFC_sQTL
ENSG00000184271	chr12	51217707	51217708	POU6F1	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000185432	chr12	50923471	50923472	METTL7A	MiGA_SVZ_eQTL,MiGA_THA_eQTL,Oli_DeJager_eQTL,Oli_mega_eQTL
ENSG00000186666	chr12	49843105	49843106	BCDIN3D	MiGA_GFM_eQTL,DLPFC_DeJager_eQTL
ENSG00000187778	chr12	49568144	49568145	MCRS1	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000196876	chr12	51590265	51590266	SCN8A	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000205352	chr12	53441677	53441678	PRR13	MiGA_SVZ_eQTL,BM_44_MSBB_eQTL
ENSG00000205426	chr12	52291533	52291534	KRT81	BM_36_MSBB_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000272822	chr12	48957364	48957365	AC073610.1	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL

Case study: GALNT6 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶