micromamba install -n r_libs r-pecotmr
micromamba install -n r_libs r-bedmatrix

source('../../codes/cb_plot.R')
source('../../codes/utilis.R')
for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'PICALM'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = 'PICALM')
target_gene_info

gene_name = 'PICALM'
gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_7/",gene_id,"_res.rds") )
# manually add orange loci 
cb_res$cb_z2z_noLD$coloc_results$csets_snp_names$`coloc_sets:Y1_Y2_Y18:CSfake` <- c("chr11:86157598:T:C","chr11:86156833:A:G")
cb_res$cb_z2z_noLD$coloc_results$csets$`coloc_sets:Y1_Y2_Y18:CSfake` <- c(4817, 4813)
cb_res$cb_z2z_noLD$coloc_results$phenotypes[[4]] <-  c('phenotype1','phenotype2','phenotype18')

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

#pdf('plots/PICALM/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
#dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/PICALM/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

get_constrast_plot(df_rosmap)

message("Multi context in ROSMAP data")

multi_context_rosmap_tmp <- tryCatch(
    readRDS(paste0('/data/analysis_result/multi_context/ROSMAP/mnm/ROSMAP_DeJager.',
                   target_gene_info$gene_info$`#chr`, '_', gene_id, '.multicontext_bvsr.rds')),
    error = function(e) message('Error in loading ROSMAP multi context data')
)
if (!is.null(multi_context_rosmap_tmp[[1]]$mvsusie_fitted)) {
    plot_and_save(multi_context_rosmap_tmp[[1]], 'plots/PICALM/sec4.multi_context_ROSMAP')
} else {
    message('Multi Context results are empty in ROSMAP data')
}

# Load and process MSBB data
message("Multi context in MSBB data")

multi_context_msbb_tmp <- tryCatch(
    readRDS(paste0('/data/analysis_result/multi_context/MSBB/mnm/MSBB_eQTL.',
                   target_gene_info$gene_info$`#chr`, '_', gene_id, '.multicontext_bvsr.rds')),
    error = function(e)  message('Error in loading MSBB multi context data')
)
if (!is.null(multi_context_msbb_tmp[[1]]$mvsusie_fitted)) {
    plot_and_save(multi_context_msbb_tmp[[1]], 'plots/PICALM/sec4.multi_context_MSBB')
} else {
    message('Multi Context results are empty in MSBB data')
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.PICALM.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$TADB_id %>% strsplit(., ',') %>% unlist %>% as.character 
sliding_windows

# Main loop to process sliding windows
mnm_gene <- list()
for (window in sliding_windows) {
    context_files <- list.files('/data/analysis_result/multi_gene/ROSMAP/mnm_genes/', window, full.names = T) %>% .[str_detect(., '.multigene_bvrs.rds')]
    for(context_file in context_files){
        context_mnm = context_file %>% basename %>% str_split(., '[.]', simplify = T) %>% .[,1]
        # Load multi-gene data
        mnm_gene_tmp <- tryCatch(
            readRDS(context_file),
            error = function(e) NULL
        )
        
        if (!is.null(mnm_gene_tmp)) {
            # Check if target gene is in the condition names
            if (target_gene_info$gene_info$region_id %in% mnm_gene_tmp[[1]]$mvsusie_fitted$condition_names) {
                # Use a common prefix format for multi-gene plots
                plot_and_save(mnm_gene_tmp[[1]], 'plots/PICALM/sec6.multigene')
            } else {
                message('There is mnm result for TAD window ', window, ' in ', context_mnm,
                        ', but it does not include target gene ', gene_name, ' in CS.')
            }
            # Append to the results list
            mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
        } 
    }
}

cowplot::plot_grid(plotlist = list(p1,p3,p2


                                 
) ,
ncol = 1,
align = "v",
axis = "tlbr",  labels = c("A", "B", "C"),  label_size = 45, # Adjust the label size as needed
  label_fontface = "bold",
rel_heights = c(2,1,3)
)

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

# for PICALM only
finempping_contexts <- c(as.character(finempping_contexts), 'MiGA_GFM_eQTL')

cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

asso_p

#lm: APOE*Xres
#to limit multi-testing, test only the one of interest : Ast and Mic in purple and green who seems to have an interaction in TCW iPSC
#we remove also dono
picalmf<-picalm[context%in%c('Mic','Ast')&cs_name%in%c('purple',
                                                       'green')]

picalmf[,apoe4dose:=str_count(as.character(apoe_genotype),'4')]
picalmf[,AD:=str_count(as.character(apoe_genotype),'4')]

table(unique(picalmf,by='specimenID')$cogdx)


res_lm<-picalmf[,data.table(summary(lm(expression_res~genotype_res*apoe4dose))$coefficients,
                        keep.rownames = 'cov'),
       by=c('SNP','context','cs_name')]

#padj: n of cs context
ntests=nrow(unique(res_lm,by=c('cs_name','context')))
res_lm[,padj:=p.adjust(`Pr(>|t|)`,n = ntests),by=c('SNP','context','cov')]

res_lm[padj<0.1][cov=='genotype_res:apoe4dose']


ggplot(unique(res_lm[order(padj)],by=c('cs_name','context','cov')))+
  geom_col(aes(x=cov,y=-log10(`Pr(>|t|)`),fill=Estimate))+
facet_wrap(context~cs_name)+theme_bw()+scale_fill_gradient2(low = 'blue',high = 'red')+
  scale_x_discrete(guide = guide_axis(angle = 60))+geom_hline(yintercept = -log10(0.05),colour = 'grey',linetype='dashed')

#fwrite(res_lmf,fp(out,'res_lm_astromic_purple_green_qtl_interacAPOE.csv.gz'))


#fwrite(picalm,fp(out,'res_picalm_expr_css_interacAPOE.csv.gz'))

  1   2   3   4   5   6 
133 101   5 127  19  13

quant_coef_colocvar

vars_p

apoe_p

func_p

options(repr.plot.width=12, repr.plot.height=6)
ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
  geom_point(alpha = 0.7) +
  labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
       x = "Gene Name",
       y = "PIP",
       size = "PIP",
       color = "CS Coverage 0.95 Min Corr") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave(paste0('plots/PICALM/sec12.trans_fine_mapping_',gene_name,'.pdf'), height = 5, width = 8)

%preview 'plots/PICALM/Mic_subtypes.png'

%preview 'plots/PICALM/PICALM_exp_Kellis_anno.png'

%preview 'plots/PICALM/PICALM_subtype_exp.png'

%preview 'plots/PICALM/PICALM_subtype_exp_visua.png'

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000073921	chr11	84957175	87360000	86069881	chr11:84267999-86714492,chr11:86714492-89332148	11_84267999-86714492,11_86714492-89332148	11_84267999-86714492,11_86714492_89332148	TADB_914,TADB_915,TADB_916,TADB_917,TADB_918	chr11_80821272_86627922,chr11_82455012_86627922,chr11_82455012_86627922,chr11_82455012_88330052,chr11_86037843_91192894	86069882	85957175	chr11:77324757-86627922,chr11:80552225-86627922,chr11:80821272-88330052,chr11:82455012-91192894,chr11:82455012-94812378,chr11:86037843-97507574	PICALM

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
AC; DLPFC_productive	NA	6	0.581679809	3851; 3834; 4027; 3993; 4333; 4064	chr11:85947843:TAGG:CAGG; chr11:85943544:A:G; chr11:85981138:A:C; chr11:85973892:T:C; chr11:86055977:A:G; chr11:85991934:T:C	chr11:85947843:TAGG:CAGG	coloc_sets:Y8_Y14:CS1
Mic; Ast; pQTL; AC_unproductive; DLPFC_unproductive; PCC_productive; AD_Bellenguez_2022	NA	14	0.428056556	4422; 4367; 4379; 4425; 4207; 4204; 4246; 4306; 4320; 4458; 4292; 4217; 4279; 3981	chr11:86076782:C:T; chr11:86063377:GTA:G; chr11:86065502:G:A; chr11:86077309:C:T; chr11:86031175:CA:C; chr11:86030088:C:T; chr11:86039629:A:T; chr11:86051202:C:T; chr11:86054256:G:A; chr11:86083718:C:T; chr11:86048986:T:C; chr11:86034995:C:T; chr11:86046547:C:T; chr11:85970540:A:G	chr11:86065502:G:A	coloc_sets:Y1_Y2_Y11_Y13_Y15_Y16_Y18:CS3
Mic; Monocyte; AD_Bellenguez_2022	NA	8	0.235014277	4791; 4766; 4786; 4781; 4758; 4767; 4771; 4773	chr11:86141937:G:A; chr11:86149263:G:A; chr11:86147455:G:T; chr11:86139201:C:T; chr11:86146597:T:C; chr11:86142209:T:G; chr11:86144030:ATCT:A; chr11:86144036:GC:G	chr11:86142209:T:G	coloc_sets:Y1_Y10_Y18:CS2
Mic; Ast; AD_Bellenguez_2022	NA	2	0.001129453	4817; 4813	chr11:86157598:T:C; chr11:86156833:A:G	chr11:86157598:T:C	coloc_sets:Y1_Y2_Y18:CSfake

	variants	AC	DLPFC_productive
	<chr>	<dbl>	<dbl>
chr11:85947843:TAGG:CAGG	chr11:85947843:TAGG:CAGG	5.319214	5.908326
chr11:85943544:A:G	chr11:85943544:A:G	5.240631	5.851143
chr11:85981138:A:C	chr11:85981138:A:C	5.153815	5.779653
chr11:85973892:T:C	chr11:85973892:T:C	5.065856	5.739538
chr11:86055977:A:G	chr11:86055977:A:G	5.139122	5.653113
chr11:85991934:T:C	chr11:85991934:T:C	5.099124	5.608884

	variants	Mic	Ast	pQTL	AC_unproductive	DLPFC_unproductive	PCC_productive	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr11:86076782:C:T	chr11:86076782:C:T	-10.52174	-4.517582	-3.818992	-10.89911	-8.859720	-2.478630	11.75000
chr11:86063377:GTA:G	chr11:86063377:GTA:G	-10.34026	-4.537517	-3.844662	-10.93803	-8.765682	-2.232304	11.58427
chr11:86065502:G:A	chr11:86065502:G:A	-10.34026	-4.537517	-3.844662	-10.94457	-8.689725	-2.199617	11.60227
chr11:86077309:C:T	chr11:86077309:C:T	-10.24246	-4.409258	-3.717093	-10.69015	-8.932100	-2.410550	11.69318
chr11:86031175:CA:C	chr11:86031175:CA:C	-10.34026	-4.537517	-3.844662	-10.88368	-8.797908	-2.238683	11.44944
chr11:86030088:C:T	chr11:86030088:C:T	-10.34026	-4.537517	-3.844662	-10.88368	-8.797908	-2.238683	11.44318
chr11:86039629:A:T	chr11:86039629:A:T	-10.38765	-4.467640	-3.817673	-10.88900	-8.666949	-2.392804	11.45455
chr11:86051202:C:T	chr11:86051202:C:T	-10.28062	-4.410642	-3.620332	-10.88368	-8.750821	-2.425912	11.54545
chr11:86054256:G:A	chr11:86054256:G:A	-10.23398	-4.480186	-3.647768	-10.88900	-8.728554	-2.205745	11.54545
chr11:86083718:C:T	chr11:86083718:C:T	-10.09243	-4.405057	-3.649974	-10.84648	-8.639717	-2.374147	11.60227
chr11:86048986:T:C	chr11:86048986:T:C	-10.10608	-4.358270	-3.715062	-10.67645	-8.816007	-2.357959	11.56322
chr11:86034995:C:T	chr11:86034995:C:T	-10.10608	-4.358270	-3.701529	-10.67645	-8.793389	-2.406776	11.49425
chr11:86046547:C:T	chr11:86046547:C:T	-10.05720	-4.428364	-3.742244	-10.67645	-8.870296	-2.171719	11.50575
chr11:85970540:A:G	chr11:85970540:A:G	-10.06227	-4.422811	-3.654206	-10.75134	-8.781144	-2.499121	11.24138

	variants	Mic	Monocyte	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>
chr11:86149263:G:A	chr11:86149263:G:A	-11.29765	5.599828	11.81481
chr11:86141937:G:A	chr11:86141937:G:A	-11.21284	5.514002	12.01235
chr11:86147455:G:T	chr11:86147455:G:T	-11.23201	5.599828	11.79012
chr11:86146597:T:C	chr11:86146597:T:C	-11.23201	5.598496	11.77778
chr11:86139201:C:T	chr11:86139201:C:T	-11.18821	5.394218	11.96296
chr11:86142209:T:G	chr11:86142209:T:G	-11.47646	5.511806	10.97561
chr11:86144030:ATCT:A	chr11:86144030:ATCT:A	-11.08686	5.512998	11.63881
chr11:86144036:GC:G	chr11:86144036:GC:G	-11.08686	5.512998	11.63831

Case study: PICALM xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results, which includes:¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Quantile QTL analysis¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶

coloc_csets_1	coloc_csets_2	min_abs_cor	max_abs_cor	median_abs_cor
coloc_sets:Y8_Y14:CS1	coloc_sets:Y1_Y2_Y11_Y13_Y15_Y16_Y18:CS3	0.269963679937246	0.316245737480089	0.285559647837661
coloc_sets:Y8_Y14:CS1	coloc_sets:Y1_Y10_Y18:CS2	0.386319652140945	0.45771188274784	0.406421060547751
coloc_sets:Y8_Y14:CS1	coloc_sets:Y1_Y2_Y18:CSfake	0.140379346854212	0.1603203091394	0.146601624689543
coloc_sets:Y1_Y2_Y11_Y13_Y15_Y16_Y18:CS3	coloc_sets:Y1_Y10_Y18:CS2	0.578864117010519	0.669086686252231	0.599914040978263
coloc_sets:Y1_Y2_Y11_Y13_Y15_Y16_Y18:CS3	coloc_sets:Y1_Y2_Y18:CSfake	0.775939867482711	0.799064783684259	0.783879863411505
coloc_sets:Y1_Y10_Y18:CS2	coloc_sets:Y1_Y2_Y18:CSfake	0.613986361545765	0.638588517356663	0.624090378592517

SNP	context	cs_name	cov	Estimate	Std. Error	t value	Pr(>\|t\|)	padj
<fct>	<chr>	<chr>	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr11:86141937:G:A	Ast	purple	genotype_res:apoe4dose	-0.1522615	0.05776645	-2.635812	0.008725144	0.03490057
chr11:86142209:T:G	Ast	purple	genotype_res:apoe4dose	-0.1441105	0.05833496	-2.470396	0.013918869	0.05567548
chr11:86144030:ATCT:A	Ast	purple	genotype_res:apoe4dose	-0.1398515	0.05807169	-2.408257	0.016487386	0.06594954
chr11:86144036:GC:G	Ast	purple	genotype_res:apoe4dose	-0.1398515	0.05807169	-2.408257	0.016487386	0.06594954
chr11:86146597:T:C	Ast	purple	genotype_res:apoe4dose	-0.1492720	0.05747814	-2.597023	0.009755636	0.03902254
chr11:86147455:G:T	Ast	purple	genotype_res:apoe4dose	-0.1492720	0.05747814	-2.597023	0.009755636	0.03902254
chr11:86149263:G:A	Ast	purple	genotype_res:apoe4dose	-0.1445153	0.05763470	-2.507437	0.012562496	0.05024999