micromamba install -n r_libs r-pecotmr
micromamba install -n r_libs r-bedmatrix

source('/data/interactive_analysis/rf2872/codes/cb_plot.R')
source('/data/interactive_analysis/rf2872/codes/utilis.R')
for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'TSPAN14'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('/data/interactive_analysis/rf2872/codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/TSPAN14/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/TSPAN14/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

plot_TWAS_res(gene_id = gene_id)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.TSPAN14.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(TSPAN14_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for TSPAN14 csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/TSPAN14/sec11.interaction_association_TSPAN14_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

FIXME

func_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
    ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/TSPAN14/sec12.trans_fine_mapping_',gene_name,'.pdf'), height = 5, width = 8)
} else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000108219	chr10	79454165	84120000	80454165	chr10:79309511-80126158,chr10:80126158-82231647,chr10:82231647-83966001,chr10:83966001-85526873	10_79309511-80126158,10_80126158-82231647,10_82231647-83966001,10_83966001-85526873	10_79309511-80126158,10_80126158_82231647,10_82231647_83966001,10_83966001_85526873	TADB_849,TADB_850	chr10_76790791_81245367,chr10_78526508_85081995	80454166	80533124	chr10:74243567-81245367,chr10:75869601-85081995,chr10:76790791-88435035,chr10:78526508-91908553,chr10:83138420-94806272	TSPAN14

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
AC; AC_unproductive; DLPFC_unproductive	1.0000000	1	0.9997845	7181	chr10:80512871:TTTGTTGTTGTTGTTG:TTTG	chr10:80512871:TTTGTTGTTGTTGTTG:TTTG	coloc_sets:Y7_Y11_Y12:CS1
Exc; DLPFC; AC; PCC; DLPFC_unproductive; PCC_unproductive; AD_Bellenguez_2022	0.9862626	10	0.1535453	7165; 7099; 7098; 7103; 7132; 7163; 7090; 7159; 7184; 7200	chr10:80513323:T:C; chr10:80510092:C:G; chr10:80520381:T:G; chr10:80489996:G:A; chr10:80509855:A:G; chr10:80491788:C:A; chr10:80492013:G:A; chr10:80493945:C:A; chr10:80497814:T:C; chr10:80508189:A:C	chr10:80513323:T:C	coloc_sets:Y4_Y6_Y7_Y8_Y12_Y13_Y14:CS2

	variants	AC	AC_unproductive	DLPFC_unproductive
	<chr>	<dbl>	<dbl>	<dbl>
chr10:80512871:TTTGTTGTTGTTGTTG:TTTG	chr10:80512871:TTTGTTGTTGTTGTTG:TTTG	13.08519	-8.534175	-12.50158

	variants	Exc	DLPFC	AC	PCC	DLPFC_unproductive	PCC_unproductive	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
chr10:80510092:C:G	chr10:80510092:C:G	8.085155	7.515560	13.73716	12.91315	-12.27762	-8.827435	7.611650
chr10:80492013:G:A	chr10:80492013:G:A	8.085155	7.512116	13.76660	12.87399	-12.26355	-8.706989	7.676471
chr10:80491788:C:A	chr10:80491788:C:A	8.085155	7.512116	13.76660	12.87399	-12.26355	-8.706989	7.656863
chr10:80493945:C:A	chr10:80493945:C:A	8.085155	7.512116	13.76660	12.87399	-12.26355	-8.706989	7.656863
chr10:80497814:T:C	chr10:80497814:T:C	8.085155	7.512116	13.76660	12.87399	-12.26355	-8.706989	7.607843
chr10:80509855:A:G	chr10:80509855:A:G	8.085155	7.495725	13.73716	12.91315	-12.16287	-8.827435	7.524272
chr10:80489996:G:A	chr10:80489996:G:A	8.085155	7.519246	13.85109	12.85209	-12.06513	-8.847074	7.441176
chr10:80508189:A:C	chr10:80508189:A:C	8.085155	7.471587	13.73716	12.91315	-12.11940	-8.827435	7.419048
chr10:80513323:T:C	chr10:80513323:T:C	8.089445	7.646292	13.89353	13.08764	-12.67859	-8.730885	0.000000
chr10:80520381:T:G	chr10:80520381:T:G	7.992847	7.579862	13.54425	13.02695	-12.60627	-8.521531	0.000000

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000108179	chr10	79347468	79347469	PPIF	MiGA_SVZ_eQTL
ENSG00000122359	chr10	80205571	80205572	ANXA11	Knight_eQTL,BM_36_MSBB_eQTL,BM_44_MSBB_eQTL,MSBB_BM36_pQTL,Ast_DeJager_eQTL,Exc_DeJager_eQTL,DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL,Ast_Kellis_eQTL,Exc_Kellis_eQTL,Ast_mega_eQTL,Exc_mega_eQTL,DLPFC_Bennett_pQTL,ROSMAP_DLPFC_sQTL
ENSG00000122378	chr10	80407828	80407829	PRXL2A	MiGA_GTS_eQTL,MiGA_SVZ_eQTL,ROSMAP_DLPFC_sQTL
ENSG00000133661	chr10	79982613	79982614	SFTPD	MiGA_GTS_eQTL,ROSMAP_AC_sQTL
ENSG00000133665	chr10	80344744	80344745	DYDC2	MiGA_SVZ_eQTL,PCC_DeJager_eQTL
ENSG00000133678	chr10	80078645	80078646	TMEM254	MiGA_SVZ_eQTL,MiGA_THA_eQTL,AC_DeJager_eQTL
ENSG00000138326	chr10	78033759	78033760	RPS24	MiGA_THA_eQTL
ENSG00000148600	chr10	84194536	84194537	CDHR1	ROSMAP_PCC_sQTL
ENSG00000185303	chr10	79560406	79560407	SFTPA2	BM_44_MSBB_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL
ENSG00000185737	chr10	81875193	81875194	NRG3	Knight_eQTL,MiGA_SVZ_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000188199	chr10	79703226	79703227	NUTM2B	AC_DeJager_eQTL

Case study: TSPAN14 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶