Results from the univariate regressions performed using can be combined in a post-processing step to perform multivariate hypothesis testing. In this example, we fit on transcript-level counts and then perform multivariate hypothesis testing by combining transcripts at the gene-level. This is done with the function.

Import transcript-level counts

Read in transcript counts from the package.

library(readr)
library(tximport)
library(tximportData)

# specify directory
path = system.file("extdata", package="tximportData")

# read sample meta-data
samples = read.table(file.path(path,"samples.txt"), header=TRUE)
samples.ext = read.table(file.path(path,"samples_extended.txt"), header=TRUE, sep="\t")

# read assignment of transcripts to genes
# remove genes on the PAR, since these are present twice
tx2gene = read_csv(file.path(path, "tx2gene.gencode.v27.csv"))
tx2gene = tx2gene[grep("PAR_Y", tx2gene$GENEID, invert=TRUE),]

# read transcript-level quatifictions
files = file.path(path, "salmon", samples$run, "quant.sf.gz")
txi = tximport(files, type = "salmon", txOut=TRUE)

# Create metadata simulating two conditions
sampleTable = data.frame(condition = factor(rep(c("A", "B"), each = 3)))
rownames(sampleTable) = paste0("Sample", 1:6)

Standard dream analysis

Perform standard analysis at the transcript-level

library(variancePartition)
library(edgeR)

# Prepare transcript-level reads
dge = DGEList(txi$counts)
design <- model.matrix(~condition, data = sampleTable)
isexpr = filterByExpr(dge, design)
dge = dge[isexpr,]
dge = calcNormFactors(dge) 

# Estimate precision weights
vobj = voomWithDreamWeights(dge, ~ condition, sampleTable)

# Fit regression model one transcript at a time
fit = dream(vobj, ~ condition, sampleTable)
fit = eBayes(fit)

Multivariate analysis

Combine the transcript-level results at the gene-level. The mapping between transcript and gene is stored in as a list.

# Prepare transcript to gene mapping
# keep only transcripts present in vobj
# then convert to list with key GENEID and values TXNAMEs
keep = tx2gene$TXNAME %in% rownames(vobj)
tx2gene.lst = unstack(tx2gene[keep,])

# Run multivariate test on entries in each feature set
res = mvTest(fit, vobj, tx2gene.lst, coef="conditionB")

# truncate gene names since they have version numbers
# ENST00000498289.5 -> ENST00000498289
res$ID.short = gsub("\\..+", "", res$ID)

Gene set analysis

Perform gene set analysis using on the gene-level test statistics.

# must have zenith > v1.0.2
library(zenith)
library(GSEABase)

gs = get_MSigDB("C1", to="ENSEMBL")

df_gsa = zenithPR_gsa( res$stat, res$ID.short, gs, inter.gene.cor=.05)

head(df_gsa)
##                 NGenes Correlation      delta       se       p.less p.greater       PValue
## M652_chr5q35        81        0.05 -9558.4102 1689.015 7.737973e-09 1.0000000 1.547595e-08
## M12945_chr12q21     35        0.05 -1506.2074 1897.386 2.136523e-01 0.7863477 4.273046e-01
## M5824_chr11p13      30        0.05  -988.0642 1952.248 3.063911e-01 0.6936089 6.127822e-01
## M1556_chr20q11      92        0.05   775.2733 1678.101 6.779541e-01 0.3220459 6.440917e-01
## M13160_chr16p11    105        0.05  -519.6880 1660.274 3.771373e-01 0.6228627 7.542747e-01
## M6742_chr2q36       20        0.05  -575.4590 2133.016 3.936640e-01 0.6063360 7.873280e-01
##                 Direction          FDR
## M652_chr5q35         Down 3.838035e-06
## M12945_chr12q21      Down 9.970286e-01
## M5824_chr11p13       Down 9.970286e-01
## M1556_chr20q11         Up 9.970286e-01
## M13160_chr16p11      Down 9.970286e-01
## M6742_chr2q36        Down 9.970286e-01

Session info

## R version 4.3.0 RC (2023-04-13 r84269)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.17-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_GB             
##  [4] LC_COLLATE=C               LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
## [10] LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.17.0      msigdbr_7.5.1            GSEABase_1.62.0         
##  [4] graph_1.78.0             annotate_1.78.0          XML_3.99-0.14           
##  [7] AnnotationDbi_1.62.0     IRanges_2.34.0           S4Vectors_0.38.0        
## [10] Biobase_2.60.0           BiocGenerics_0.46.0      zenith_1.2.0            
## [13] tximportData_1.27.0      tximport_1.28.0          readr_2.1.4             
## [16] edgeR_3.42.0             pander_0.6.5             variancePartition_1.30.0
## [19] BiocParallel_1.34.0      limma_3.56.0             ggplot2_3.4.2           
## [22] knitr_1.42              
## 
## loaded via a namespace (and not attached):
##   [1] jsonlite_1.8.4              magrittr_2.0.3              farver_2.1.1               
##   [4] nloptr_2.0.3                rmarkdown_2.21              zlibbioc_1.46.0            
##   [7] vctrs_0.6.2                 memoise_2.0.1               minqa_1.2.5                
##  [10] RCurl_1.98-1.12             htmltools_0.5.5             progress_1.2.2             
##  [13] curl_5.0.0                  broom_1.0.4                 sass_0.4.5                 
##  [16] KernSmooth_2.23-20          bslib_0.4.2                 pbkrtest_0.5.2             
##  [19] plyr_1.8.8                  cachem_1.0.7                lifecycle_1.0.3            
##  [22] iterators_1.0.14            pkgconfig_2.0.3             Matrix_1.5-4               
##  [25] R6_2.5.1                    fastmap_1.1.1               MatrixGenerics_1.12.0      
##  [28] GenomeInfoDbData_1.2.10     rbibutils_2.2.13            digest_0.6.31              
##  [31] numDeriv_2016.8-1.1         colorspace_2.1-0            GenomicRanges_1.52.0       
##  [34] RSQLite_2.3.1               filelock_1.0.2              labeling_0.4.2             
##  [37] RcppZiggurat_0.1.6          clusterGeneration_1.3.7     fansi_1.0.4                
##  [40] httr_1.4.5                  compiler_4.3.0              bit64_4.0.5                
##  [43] aod_1.3.2                   withr_2.5.0                 doParallel_1.0.17          
##  [46] backports_1.4.1             DBI_1.1.3                   highr_0.10                 
##  [49] gplots_3.1.3                MASS_7.3-59                 DelayedArray_0.26.0        
##  [52] gtools_3.9.4                caTools_1.18.2              tools_4.3.0                
##  [55] remaCor_0.0.11              glue_1.6.2                  nlme_3.1-162               
##  [58] grid_4.3.0                  reshape2_1.4.4              generics_0.1.3             
##  [61] snow_0.4-4                  gtable_0.3.3                tzdb_0.3.0                 
##  [64] tidyr_1.3.0                 hms_1.1.3                   utf8_1.2.3                 
##  [67] XVector_0.40.0              foreach_1.5.2               pillar_1.9.0               
##  [70] stringr_1.5.0               babelgene_22.9              vroom_1.6.1                
##  [73] splines_4.3.0               dplyr_1.1.2                 BiocFileCache_2.8.0        
##  [76] lattice_0.21-8              bit_4.0.5                   tidyselect_1.2.0           
##  [79] locfit_1.5-9.7              Biostrings_2.68.0           SummarizedExperiment_1.30.0
##  [82] RhpcBLASctl_0.23-42         xfun_0.39                   statmod_1.5.0              
##  [85] matrixStats_0.63.0          KEGGgraph_1.60.0            stringi_1.7.12             
##  [88] yaml_2.3.7                  boot_1.3-28.1               evaluate_0.20              
##  [91] codetools_0.2-19            archive_1.1.5               tibble_3.2.1               
##  [94] Rgraphviz_2.44.0            cli_3.6.1                   xtable_1.8-4               
##  [97] Rdpack_2.4                  munsell_0.5.0               jquerylib_0.1.4            
## [100] Rcpp_1.0.10                 GenomeInfoDb_1.36.0         dbplyr_2.3.2               
## [103] png_0.1-8                   Rfast_2.0.7                 RUnit_0.4.32               
## [106] parallel_4.3.0              blob_1.2.4                  prettyunits_1.1.1          
## [109] bitops_1.0-7                lme4_1.1-33                 mvtnorm_1.1-3              
## [112] lmerTest_3.1-3              scales_1.2.1                purrr_1.0.1                
## [115] crayon_1.5.2                rlang_1.1.0                 EnrichmentBrowser_2.30.0   
## [118] KEGGREST_1.40.0

<>

References