micromamba install -n r_libs r-pecotmr

# If an error occurs while sourcing scripts, it might be because your get() returned NULL. 
#Please restart the kernel or click the R kernel in the upper right corner to resolve the issue.
source('../../codes/cb_plot.R')
source('../../codes/utilis.R')

for(file in list.files("/data/colocalization/colocboost/R", pattern = ".R", full.names = T)){
          source(file)
        }
gene_name = 'MS4A4E'

dir.create(paste0('plots/', gene_name), recursive = T)

target_gene_info <- get_gene_info(gene_name = gene_name)
target_gene_info

gene_id = target_gene_info$gene_info$region_id
chrom = target_gene_info$gene_info$`#chr`

source('../../codes/utilis.R')
expression_in_rosmap_bulk(target_gene_info)

region_p

pip_p

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb_res <- readRDS(paste0("/data/analysis_result/ColocBoost/2024_9/",gene_id,"_res.rds") )

cb <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2)

pdf('plots/MS4A4E/sec2.colocboost_res.pdf', width = 10, height = 5)
replayPlot(cb$p)
dev.off()

# colocalized variants
cb_res_table

# effect sign for each coloc sets
get_effect_sign_csets(cb_res)

# LD between coloc sets
get_between_purity_simple(cb_res, gene.name = gene_id, path = '/data/colocalization/QTL_data/eQTL/')

AD_cohorts <- c('AD_Jansen_2021', 'AD_Bellenguez_EADB_2022', 'AD_Bellenguez_EADI_2022',
             'AD_Kunkle_Stage1_2019', 'AD_Wightman_Excluding23andMe_2021',
             'AD_Wightman_ExcludingUKBand23andME_2021', 'AD_Wightman_Full_2021')
cb_ad <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_gwas = TRUE, gene_id = gene_id, cohorts = AD_cohorts)

No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.No pvalue cutoff. Extract all variants names.

pdf('plots/MS4A4E/sec3.colocboost_res_allad.pdf', width = 10, height = 5)
replayPlot(cb_ad$p)
dev.off()

mash_p <- mash_plot(gene_name = 'MS4A4E')

options(repr.plot.width = 10, repr.plot.height = 10)

for (mash_p_tmp in mash_p) {
    print(mash_p_tmp)
}

message("Multi context in ROSMAP data")

multi_context_rosmap_tmp <- tryCatch(
    readRDS(paste0('/data/analysis_result/multi_context/ROSMAP/mnm/ROSMAP_DeJager.',
                   target_gene_info$gene_info$`#chr`, '_', gene_id, '.multicontext_bvsr.rds')),
    error = function(e) message('Error in loading ROSMAP multi context data')
)
if (!is.null(multi_context_rosmap_tmp[[1]]$mvsusie_fitted)) {
    plot_and_save(multi_context_rosmap_tmp[[1]], 'plots/MS4A4E/sec4.multi_context_ROSMAP')
} else {
    message('Multi Context results are empty in ROSMAP data')
}

# Load and process MSBB data
message("Multi context in MSBB data")

multi_context_msbb_tmp <- tryCatch(
    readRDS(paste0('/data/analysis_result/multi_context/MSBB/mnm/MSBB_eQTL.',
                   target_gene_info$gene_info$`#chr`, '_', gene_id, '.multicontext_bvsr.rds')),
    error = function(e)  message('Error in loading MSBB multi context data')
)
if (!is.null(multi_context_msbb_tmp[[1]]$mvsusie_fitted)) {
    plot_and_save(multi_context_msbb_tmp[[1]], 'plots/MS4A4E/sec4.multi_context_MSBB')
} else {
    message('Multi Context results are empty in MSBB data')
}

plot_TWAS_res(gene_id = gene_id, gene_name = gene_name)

multigene_flat <- get_multigene_multicontext_flatten('Fungen_xQTL_allQTL.overlapped.gwas.export.MS4A4E.rds', sQTL = 'no_MSBB')
multigene_flat

sliding_windows <- target_gene_info$gene_info$TADB_id %>% strsplit(., ',') %>% unlist %>% as.character
sliding_windows

mnm_gene_tmp[[1]] %>%

gene_ref <- fread('/data/resource/References/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.region_list')

ms4a_genes <- gene_ref %>% filter(str_detect(gene_name, 'MS4A')) %>% pull(gene_id)

 gene_ref %>% filter(str_detect(gene_name, 'MS4A'))

# Main loop to process sliding windows
mnm_gene <- list()
for (window in sliding_windows) {
    context_files <- list.files('/data/analysis_result/multi_gene/ROSMAP/mnm_genes/', window, full.names = T) %>% .[str_detect(., '.multigene_bvrs.rds')]
    for(context_file in context_files){
        context_mnm = context_file %>% basename %>% str_split(., '[.]', simplify = T) %>% .[,1]
        # Load multi-gene data
        mnm_gene_tmp <- tryCatch(
            readRDS(context_file),
            error = function(e) NULL
        )
        
        if (!is.null(mnm_gene_tmp)) {
            # Check if target gene is in the condition names
            if (any(ms4a_genes %in% mnm_gene_tmp[[1]]$mvsusie_fitted$condition_names)) {
                # Use a common prefix format for multi-gene plots
                plot_and_save(mnm_gene_tmp[[1]], 'plots/MS4A4E/sec6.multigene')
            } else {
                message('There is mnm result for TAD window ', window, ' in ', context_mnm,
                        ', but it does not include MS4A family in CS.')
            }
            # Append to the results list
            mnm_gene <- append(mnm_gene, list(mnm_gene_tmp))
        } 
    }
}

options(repr.plot.width = 40, repr.plot.height = 40)

 ggplot() + theme_bw() + facet_grid(cs_coverage_0.95 + study + region ~ ., labeller = labeller(.rows = function(x) gsub("([_:,-])", "\n", x)), scale = "free_y") +

      theme(text = element_text(size = 20), strip.text.y = element_text(size = 25, angle = 0.5)) +
     # xlim(view_win) +
      ylab("Estimated effect") +
   #   geom_line(data = haQTL_df %>% mutate(study = "haQTL effect") %>% filter(CS == 5),
    #            aes_string(y = "fun_plot", x = "x", col = "CS"), size = 4, col = "#00AEEF") +
  geom_line(data = effect_of_interest ,
                aes_string(y = "fun_plot", x = "x", col = "cs_coverage_0.95"), size = 2) +  
    geom_point(data = effect_of_interest ,
                aes_string(y = "pip", x = "pos", col = "cs_coverage_0.95"), size = 10) +
    theme(text = element_text(size = 40), strip.text.y = element_text(size = 15, angle = 0.5), 
            axis.text.x = element_text(size = 40), axis.title.x = element_text(size = 40)) +
      xlab("Position") +
      ylab("Estimated\neffect") +
      geom_segment(arrow = arrow(length = unit(1, "cm")), aes(x = gene_start, xend = gene_end, y = 1, yend = 1), size = 6,
                  data = tar_gene_info$gene_info, alpha = 0.3) +
      geom_text(aes(x = (gene_start + gene_end) / 2, y = 1 , label = gene_name), size = 10, 
              data = tar_gene_info$gene_info)+
        geom_point(aes(x = pos, y = pip  ) ,color = "red", data = flatten_table%>%filter( str_detect(study,"AD_") , cs_coverage_0.95 != 0  )%>%mutate(AD_study = study%>%str_replace_all("_","\n" ))%>%select(-study,-region,-cs_coverage_0.95) )

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- readRDS(paste0(gene_name, '_finemapping_contexts.rds')) # from sec1

finempping_contexts <- get_norosmap_contexts(finempping_contexts)

cb_contexts <- plot_cb(cb_res = cb_res, cex.pheno = 1.5, x.phen = -0.2, add_QTL = TRUE, cohorts = finempping_contexts, gene_id = gene_id)

No pvalue cutoff. Extract all variants names.

options(repr.plot.width=6, repr.plot.height=6)

ggplot(MS4A4E_int_res, aes(x = variant_id, y = qvalue_interaction)) +
  geom_point(alpha = 0.7, size = 6) +
  labs(title = "qvalue for MS4A4E csets in interaction association nalysis",
       x = "Gene Name",
       y = "qvalue_interaction",
       size = "qvalue_interaction") +
  theme_minimal(base_size = 14) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey80"),
        legend.position = NULL,
        axis.text.x = element_text(angle = 45, hjust = 1))  + ylim(0,1)
  # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
ggsave('plots/MS4A4E/sec11.interaction_association_MS4A4E_lessPIP25.pdf', height = 5, width = 8)

vars_p

apoe_p

options(repr.plot.width=12, repr.plot.height=6)
if(!is.null(flat_var)){
   p =  ggplot(flat_var, aes(x = gene_name, y = pip, size = pip)) +
      geom_point(alpha = 0.7) +
      labs(title = paste0("PIP values for trans fine mapped Genes in ", gene_name ," csets with AD"),
           x = "Gene Name",
           y = "PIP",
           size = "PIP",
           color = "CS Coverage 0.95 Min Corr") +
      theme_minimal(base_size = 14) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_line(color = "grey80"),
            legend.position = NULL,
            axis.text.x = element_text(angle = 45, hjust = 1))  
      # scale_color_manual(values = colorRampPalette(brewer.pal(8, "Set1"))(length(unique(flat_var$gene_name))))
    ggsave(paste0('plots/MS4A4E/sec12.trans_fine_mapping_',gene_name,'.pdf'),p, height = 5, width = 8)
    p
    } else{
    message('There are no detectable trans signals for ', gene_name)
}

region_id	#chr	start	end	TSS	LD_matrix_id	LD_sumstats_id	LD_sumstats_id_old	TADB_index	TADB_id	gene_start	gene_end	sliding_windows	gene_name
<chr>	<chr>	<dbl>	<dbl>	<int>	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000214787	chr11	57280000	61440000	60243136	chr11:56858541-60339997,chr11:60339997-63818332	11_56858541-60339997,11_60339997-63818332	11_56858541-60339997,11_60339997_63818332	TADB_903,TADB_904	chr11_56299638_62485822,chr11_60203514_65321811	60243137	60200270	chr11:44804071-58324952,chr11:46980557-62485822,chr11:50840000-65321811,chr11:56299638-66671716,chr11:60203514-68955802	MS4A4E

colocalized phenotypes	purity	# variants	highest VCP	colocalized index	colocalized variants	max_abs_z_variant	cset_id
<chr>	<dbl>	<dbl>	<dbl>	<chr>	<chr>	<chr>	<chr>
DLPFC; AD_Bellenguez_2022	0.825047	24	0.1558792	11569; 11568; 11573; 11574; 11562; 11551; 11581; 11540; 11518; 11900; 11914; 11589; 11632; 11625; 11640; 11641; 11633; 11624; 11615; 11613; 11606; 11659; 11661; 11665	chr11:60175636:A:T; chr11:60175342:A:G; chr11:60177107:C:T; chr11:60177337:T:C; chr11:60173126:T:A; chr11:60169453:A:C; chr11:60178272:T:C; chr11:60165106:G:A; chr11:60161199:G:C; chr11:60251788:G:GTA; chr11:60254475:G:A; chr11:60180901:C:G; chr11:60193954:G:A; chr11:60192496:G:A; chr11:60194693:G:A; chr11:60194716:G:A; chr11:60194013:G:T; chr11:60192370:T:C; chr11:60190040:C:T; chr11:60189465:C:T; chr11:60187884:G:A; chr11:60198316:G:A; chr11:60198822:T:C; chr11:60200053:A:G	chr11:60175342:A:G	coloc_sets:Y2_Y6:CS1

	variants	DLPFC	AD_Bellenguez_2022
	<chr>	<dbl>	<dbl>
chr11:60175636:A:T	chr11:60175636:A:T	-4.374553	-9.530120
chr11:60175342:A:G	chr11:60175342:A:G	-4.387689	-9.457831
chr11:60177107:C:T	chr11:60177107:C:T	-4.367298	-9.469880
chr11:60177337:T:C	chr11:60177337:T:C	-4.374553	-9.445783
chr11:60173126:T:A	chr11:60173126:T:A	-4.285175	-9.882353
chr11:60169453:A:C	chr11:60169453:A:C	-4.329628	-9.518072
chr11:60178272:T:C	chr11:60178272:T:C	-4.374553	-9.361446
chr11:60165106:G:A	chr11:60165106:G:A	-4.264497	-9.506024
chr11:60161199:G:C	chr11:60161199:G:C	-4.171721	-9.554217
chr11:60251788:G:GTA	chr11:60251788:G:GTA	-3.725960	-10.283989
chr11:60254475:G:A	chr11:60254475:G:A	-3.703405	-10.238095
chr11:60180901:C:G	chr11:60180901:C:G	-4.236167	-9.325301
chr11:60193954:G:A	chr11:60193954:G:A	-4.243795	-9.228916
chr11:60192496:G:A	chr11:60192496:G:A	-4.221645	-9.253012
chr11:60194693:G:A	chr11:60194693:G:A	-4.255751	-9.204819
chr11:60194716:G:A	chr11:60194716:G:A	-4.224732	-9.192771
chr11:60194013:G:T	chr11:60194013:G:T	-4.224514	-9.180723
chr11:60192370:T:C	chr11:60192370:T:C	-4.108100	-9.253012
chr11:60190040:C:T	chr11:60190040:C:T	-4.120288	-9.228916
chr11:60189465:C:T	chr11:60189465:C:T	-4.120288	-9.228916
chr11:60187884:G:A	chr11:60187884:G:A	-4.120288	-9.228916
chr11:60198316:G:A	chr11:60198316:G:A	-4.092060	-9.228916
chr11:60198822:T:C	chr11:60198822:T:C	-4.086291	-9.216867
chr11:60200053:A:G	chr11:60200053:A:G	-4.092060	-9.204819

gene_id	#chr	start	end	gene_name	contexts
<chr>	<chr>	<int>	<int>	<chr>	<chr>
ENSG00000006118	chr11	60924459	60924460	TMEM132A	AC_DeJager_eQTL,STARNET_eQTL
ENSG00000011347	chr11	61581147	61581148	SYT7	MiGA_SVZ_eQTL
ENSG00000013725	chr11	60971679	60971680	CD6	BM_22_MSBB_eQTL
ENSG00000071203	chr11	60492777	60492778	MS4A12	MiGA_GTS_eQTL
ENSG00000110079	chr11	60185656	60185657	MS4A4A	ROSMAP_AC_sQTL,ROSMAP_PCC_sQTL
ENSG00000110446	chr11	60952652	60952653	SLC15A3	MiGA_SVZ_eQTL
ENSG00000134809	chr11	57530802	57530803	TIMM10	BM_10_MSBB_eQTL
ENSG00000149124	chr11	58731973	58731974	GLYAT	MiGA_GTS_eQTL
ENSG00000149131	chr11	57597386	57597387	SERPING1	MiGA_SVZ_eQTL
ENSG00000149150	chr11	57515779	57515780	SLC43A1	ROSMAP_DLPFC_sQTL
ENSG00000149503	chr11	62123997	62123998	INCENP	MiGA_THA_eQTL
ENSG00000149506	chr11	60867561	60867562	ZP1	MiGA_THA_eQTL
ENSG00000156603	chr11	57712322	57712323	MED19	MiGA_GFM_eQTL

#chr	start	end	gene_id	gene_name
<chr>	<int>	<int>	<chr>	<chr>
chr11	60056586	60056587	ENSG00000149516	MS4A3
chr11	60088260	60088261	ENSG00000149534	MS4A2
chr11	60184665	60184666	ENSG00000110077	MS4A6A
chr11	60185656	60185657	ENSG00000110079	MS4A4A
chr11	60243136	60243137	ENSG00000214787	MS4A4E
chr11	60334830	60334831	ENSG00000166926	MS4A6E
chr11	60378484	60378485	ENSG00000166927	MS4A7
chr11	60378529	60378530	ENSG00000166928	MS4A14
chr11	60429571	60429572	ENSG00000166930	MS4A5
chr11	60455845	60455846	ENSG00000156738	MS4A1
chr11	60492777	60492778	ENSG00000071203	MS4A12
chr11	60515391	60515392	ENSG00000204979	MS4A13
chr11	60577855	60577856	ENSG00000283601	MS4A19P
chr11	60699611	60699612	ENSG00000166959	MS4A8
chr11	60729303	60729304	ENSG00000214782	MS4A18
chr11	60756866	60756867	ENSG00000166961	MS4A15
chr11	60785332	60785333	ENSG00000172689	MS4A10

Case study: MS4A4E xQTL and AD GWAS¶

Overview¶

Computing environment setup¶

How to Use This Notebook¶

Section 0: Sanity check ¶

Check the basic information of the gene¶

Check the existing results which are inputs to this analysis¶

Check the existing results which are inputs to this analysis¶

Section 1: Fine-mapping for xQTL and GWAS ¶

Section 2: Multi-context colocalization with Bellenguez 2022 ¶

Section 3: Refinement of colocalized loci with other AD GWAS ¶

Section 4: Assessment of multi-context xQTL effect sizes ¶

Option 1: ColocBoost + MASH¶

Option 2: mvSuSiE¶

Section 5: Multi-context causal TWAS (including conventional TWAS and MR)¶

TWAS results¶

MR results¶

cTWAS results¶

Section 6: Context specific multi-gene fine-mapping ¶

A quick analysis: using the xQTL-AD summary table (flatten table)¶

A statistically solid approach: mvSuSiE multi-gene analysis¶

Section 7: Epigenomic QTL and their target regions ¶

Generate a crude plot to determined whether the story is interesting¶

Section 8: Context focused validation in other xQTL data ¶

Section 9: Non-linear effects of xQTL ¶

APOE interaction¶

Section 10: in silico functional studies in iPSC model ¶

Section 11: Functional annotations of selected loci ¶

Section 12: Candidate loci as trans-xQTL ¶

Creative thinking: generate hypothesis, search in literature, raise questions to discuss¶