micromamba install -n r_libs r-pecotmr

# If an error occurs while sourcing scripts, it might be because your get() returned NULL. 
#Please restart the kernel or click the R kernel in the upper right corner to resolve the issue.
source('../../codes/cb_plot.R')
source('../../codes/utilis.R')

for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'NECTIN2'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

#save colocboost results
cb_res_table <- get_cb_summary(cb_res) 

saveRDS(cb_res_table, paste0(gene_name, "_colocboost_res.rds"))

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/NECTIN2/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.

pdf('plots/NECTIN2/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'NECTIN2')

options(repr.plot.width = 10, repr.plot.height = 10)

for (mash_p_tmp in mash_p) {
    print(mash_p_tmp)
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.NECTIN2.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$sliding_windows %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene <- list()
for (window in sliding_windows) {
    mnm_gene_tmp <- NULL
    mnm_gene_tmp <- tryCatch(
        readRDS(paste0('/data/analysis_result/mvsusie_multi_gene/multi_gene/ROSMAP_multi_gene.', window, '.mnm.rds')),
        error = function(e) NULL
    )
    
    if (!is.null(mnm_gene_tmp)) {
        if(target_gene_info$gene_info$region_id %in% mnm_gene_tmp$mvsusie_fitted$condition_names){
        tryCatch({
            p <- mvsusieR::mvsusie_plot(mnm_gene_tmp$mvsusie_fitted, sentinel_only = F, add_cs = T)
            print(p)  # This ensures the plot is displayed in JupyterLab
        }, error = function(e) NULL)
        } else {
            message('There is mnm result for sliding window ',window,', but not include target gene ', gene_name, ' in CS')
        }
        mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_contexts <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(NECTIN2_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for NECTIN2 csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/NECTIN2/sec11.interaction_association_NECTIN2_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
   p =  ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/NECTIN2/sec12.trans_fine_mapping_',gene_name,'.pdf'),p, height = 5, width = 8)
    p
    } else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000130202	chr19	41840000	47960000	44846174	chr19:41137068-42346101,chr19:42346101-44935906,chr19:44935906-46842901,chr19:46842901-48590136	19_41137068-42346101,19_42346101-44935906,19_44935906-46842901,19_46842901-48590136	19_41137068-42346101,19_42346101_44935906,19_44935906_46842901,19_46842901_48590136	TADB_1261,TADB_1262	chr19_40837074_46645602,chr19_43631573_48886315	44846175	44889223	chr19:29228289-44527222,chr19:31719752-46645602,chr19:34641744-48886315,chr19:40837074-55473296,chr19:43631573-57160893,chr19:46290022-58617616	NECTIN2

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000007047	chr19	45079287	45079288	MARK4	ROSMAP_AC_sQTL
ENSG00000008438	chr19	46023052	46023053	PGLYRP1	MiGA_THA_eQTL
ENSG00000010310	chr19	45668220	45668221	GIPR	MiGA_THA_eQTL,Oli_mega_eQTL
ENSG00000011478	chr19	45692402	45692403	QPCTL	MiGA_SVZ_eQTL
ENSG00000011485	chr19	46347086	46347087	PPP5C	ROSMAP_AC_sQTL
ENSG00000012061	chr19	45478827	45478828	ERCC1	ROSMAP_PCC_sQTL
ENSG00000013275	chr19	39971164	39971165	PSMC4	DLPFC_Bennett_pQTL,ROSMAP_AC_sQTL
ENSG00000024422	chr19	47713421	47713422	EHD2	MiGA_THA_eQTL
ENSG00000028277	chr19	42196584	42196585	POU2F2	ROSMAP_AC_sQTL
ENSG00000062370	chr19	44367216	44367217	ZNF112	MiGA_GFM_eQTL
ENSG00000063169	chr19	47608195	47608196	BICRA	ROSMAP_PCC_sQTL
ENSG00000063176	chr19	48619290	48619291	SPHK2	Oli_mega_eQTL
ENSG00000069399	chr19	44747835	44747836	BCL3	ROSMAP_AC_sQTL
ENSG00000073008	chr19	44643797	44643798	PVR	MiGA_SVZ_eQTL,ROSMAP_PCC_sQTL
ENSG00000073050	chr19	43580472	43580473	XRCC1	ROSMAP_AC_sQTL,ROSMAP_DLPFC_sQTL,ROSMAP_PCC_sQTL
ENSG00000077312	chr19	40750636	40750637	SNRPA	MiGA_GTS_eQTL
ENSG00000079432	chr19	42268536	42268537	CIC	ROSMAP_PCC_sQTL
ENSG00000079462	chr19	42303545	42303546	PAFAH1B3	MiGA_THA_eQTL,ROSMAP_PCC_sQTL
ENSG00000090372	chr19	46746993	46746994	STRN4	ROSMAP_DLPFC_sQTL
ENSG00000104783	chr19	43781256	43781257	KCNN4	MiGA_THA_eQTL
ENSG00000104853	chr19	44954590	44954591	CLPTM1	Knight_eQTL,MiGA_THA_eQTL,PCC_DeJager_eQTL
ENSG00000104859	chr19	45039044	45039045	CLASRP	ROSMAP_PCC_sQTL
ENSG00000104866	chr19	45091395	45091396	PPP1R37	AC_DeJager_eQTL,STARNET_eQTL
ENSG00000104879	chr19	45322874	45322875	CKM	MiGA_THA_eQTL
ENSG00000104884	chr19	45370917	45370918	ERCC2	Exc_DeJager_eQTL,PCC_DeJager_eQTL,Oli_Kellis_eQTL,Exc_mega_eQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000104888	chr19	49442359	49442360	SLC17A7	BM_36_MSBB_eQTL
ENSG00000104941	chr19	45815307	45815308	RSPH6A	DLPFC_DeJager_eQTL,PCC_DeJager_eQTL,AC_DeJager_eQTL
ENSG00000104967	chr19	45974043	45974044	NOVA2	MiGA_THA_eQTL,DLPFC_Bennett_pQTL
ENSG00000104983	chr19	45995460	45995461	CCDC61	Exc_DeJager_eQTL,Exc_Kellis_eQTL,Exc_mega_eQTL
ENSG00000105202	chr19	39846378	39846379	FBL	ROSMAP_PCC_sQTL
...	...	...	...	...	...
ENSG00000167384	chr19	44500523	44500524	ZNF180	BM_22_MSBB_eQTL
ENSG00000167555	chr19	52397848	52397849	ZNF528	MiGA_GTS_eQTL
ENSG00000167619	chr19	42313308	42313309	TMEM145	ROSMAP_PCC_sQTL
ENSG00000167635	chr19	36214601	36214602	ZNF146	Knight_eQTL
ENSG00000167637	chr19	43827320	43827321	ZNF283	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000170684	chr19	45076586	45076587	ZNF296	AC_DeJager_eQTL
ENSG00000170889	chr19	54200857	54200858	RPS9	ROSMAP_AC_sQTL
ENSG00000171051	chr19	51804109	51804110	FPR1	MiGA_SVZ_eQTL
ENSG00000176472	chr19	43525496	43525497	ZNF575	ROSMAP_PCC_sQTL
ENSG00000176531	chr19	43504934	43504935	PHLDB3	MiGA_THA_eQTL,MSBB_BM36_pQTL,DLPFC_DeJager_eQTL
ENSG00000178980	chr19	47778584	47778585	SELENOW	ROSMAP_DLPFC_sQTL
ENSG00000181027	chr19	46746045	46746046	FKRP	MiGA_SVZ_eQTL
ENSG00000187244	chr19	44809070	44809071	BCAM	DLPFC_Klein_gpQTL,ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000188624	chr19	46124687	46124688	IGFL3	MiGA_SVZ_eQTL
ENSG00000189144	chr19	37817299	37817300	ZNF573	MiGA_SVZ_eQTL
ENSG00000196235	chr19	39436155	39436156	SUPT5H	ROSMAP_AC_sQTL
ENSG00000197380	chr19	46661181	46661182	DACT3	Inh_Kellis_eQTL
ENSG00000197808	chr19	36666852	36666853	ZNF461	MiGA_GTS_eQTL
ENSG00000213889	chr19	45488776	45488777	PPM1N	MiGA_SVZ_eQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000216588	chr19	44613562	44613563	IGSF23	MiGA_THA_eQTL
ENSG00000221923	chr19	52369916	52369917	ZNF880	ROSMAP_PCC_sQTL
ENSG00000224916	chr19	44942237	44942238	APOC4-APOC2	STARNET_eQTL
ENSG00000226763	chr19	43596616	43596617	SRRM5	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL
ENSG00000234906	chr19	44946034	44946035	APOC2	MiGA_SVZ_eQTL,STARNET_eQTL
ENSG00000268434	chr19	45785972	45785973	AC011530.1	ROSMAP_AC_sQTL
ENSG00000268500	chr19	51646888	51646889	AC018755.2	STARNET_eQTL
ENSG00000272333	chr19	35717972	35717973	KMT2B	ROSMAP_DLPFC_sQTL
ENSG00000273777	chr19	44529787	44529788	CEACAM20	MiGA_THA_eQTL
ENSG00000275395	chr19	39934625	39934626	FCGBP	MiGA_THA_eQTL
ENSG00000285505	chr19	41994231	41994232	AC010616.1	ROSMAP_PCC_sQTL,ROSMAP_DLPFC_sQTL

Case study: NECTIN2 xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶