## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"------------------
BiocStyle::latex()

## ----echo=TRUE,  eval=FALSE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)----
## source("http://www.bioconductor.org/biocLite.R")
## biocLite(c("PADOG", "GSVA", "AnnotationDbi", "topGO", "pathview", "gage",
## "globaltest", "limma", "edgeR", "safe", "org.Hs.eg.db", "org.Mm.eg.db",
## "org.Rn.eg.db"))

## ----echo=TRUE,  eval=FALSE,tidy=TRUE------------------------------------
## install.packages("devtools")

## ----echo=TRUE,  eval=FALSE,tidy=TRUE------------------------------------
## library(devtools)
## install_bitbucket("malhamdoosh/egseadata", ref="Stable_Release")

## ----echo=TRUE,  eval=FALSE,tidy=TRUE------------------------------------
## source("http://bioconductor.org/biocLite.R")
## biocLite("EGSEA")

## ----echo=TRUE,  eval=FALSE,tidy=TRUE------------------------------------
## library(devtools)
## install_bitbucket("malhamdoosh/egsea", ref="Stable_Release")

## ----echo=TRUE,  eval=TRUE,tidy=TRUE-------------------------------------
library(EGSEA)

## ----echo=TRUE,  eval=TRUE,tidy=TRUE-------------------------------------
library(EGSEAdata)
data(il13.data)
v = il13.data$voom
names(v)
v$design
contrasts = il13.data$contra
contrasts

## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)----
# prepare gene set collections
gs.annots = buildIdxEZID(entrezIDs=rownames(v$E), species="human", msigdb.gsets="c5", 
 			kegg.exclude = c("Metabolism"))
names(gs.annots)

## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)----
# perform the EGSEA analysis
# set display.top = 20 to display more gene sets. It takes longer time to run.
gsa = egsea(voom.results=v, contrasts=contrasts,  gs.annots=gs.annots, 
 			symbolsMap=v$genes, baseGSEAs=egsea.base()[-2], display.top = 3,
 			 sort.by="avg.rank", egsea.dir="./il13-egsea-report", num.threads = 4)
topSets(gsa, contrast=1, gs.label="kegg", number = 10)
topSets(gsa, contrast=1, gs.label="kegg", sort.by="ora", number = 10, names.only=FALSE)
topSets(gsa, contrast="comparison", gs.label="kegg", number = 10)

## ----echo=TRUE, eval=TRUE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)----
gs.annots = buildIdxEZID(entrezIDs=rownames(v$E), species="human")
gsetdb.annots = buildGeneSetDBIdxEZID(entrezIDs=rownames(v$E), species="human")
gs.annots = c(gs.annots, gsetdb.annots)
names(gs.annots)

## ----echo=TRUE,  eval=FALSE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)----
## # load the mammary dataset
## library(EGSEA)
## library(EGSEAdata)
## data(mam.data)
## v = mam.data$voom
## names(v)
## v$design
## contrasts = mam.data$contra
## contrasts
## # build the gene set collections
## gs.annots = buildIdxEZID(entrezIDs=rownames(v$E), species="mouse",
##         msigdb.gsets = "c2",
##         kegg.exclude = "all")
## names(gs.annots)
## # create Entrez IDs - Symbols map
## symbolsMap = v$genes[,c(1,3)]
## colnames(symbolsMap) = c("FeatureID", "Symbols")
## symbolsMap[, "Symbols"] = as.character(symbolsMap[, "Symbols"])
## # replace NA Symbols with IDs
## na.sym = is.na(symbolsMap[, "Symbols"])
## symbolsMap[na.sym, "Symbols"] = symbolsMap[na.sym, "FeatureID"]
## # perform the EGSEA analysis
## # set report = TRUE to generate the EGSEA interactive report
## gsa = egsea(voom.results=v, contrasts=contrasts, gs.annots=gs.annots,
##         symbolsMap=symbolsMap, baseGSEAs=egsea.base()[-c(2,5,6,9)],
##         display.top= 20, sort.by="med.rank",
##         egsea.dir="./mam-egsea-report", num.threads=4, report=FALSE)	
## # show top 20 comparative gene sets in C2 collection
## topSets(gsa, contrast="comparison", gs.label="c2", number = 20)

## ----echo=TRUE,  eval=FALSE,tidy=TRUE,tidy.opts=list(blank=FALSE, width.cutoff=60)----
## # load the count matrix and other relevant data
## library(EGSEAdata)
## data(il13.data.cnt)
## cnt = il13.data.cnt$counts
## group = il13.data.cnt$group
## group
## design = il13.data.cnt$design
## contrasts = il13.data.cnt$contra
## genes = il13.data.cnt$genes
## # build the gene set collections
## gs.annots = buildIdxEZID(entrezIDs=rownames(cnt), species="human", msigdb.gsets="none",
##  			 kegg.exclude = c("Metabolism"))
## # perform the EGSEA analysis
## # set report = TRUE to generate the EGSEA interactive report
## gsa = egsea.cnt(counts=cnt, group=group, design=design, contrasts=contrasts,
##         gs.annots=gs.annots, symbolsMap=genes, baseGSEAs=egsea.base()[-2], display.top = 5,
##  	    sort.by="avg.rank", egsea.dir="./il13-egsea-cnt-report", num.threads = 4, report=FALSE)

## ----echo=TRUE, eval=TRUE, tidy=TRUE, tidy.opts=list(blank=FALSE, width.cutoff=60)----
# load IL-13 dataset
library(EGSEAdata)
data(il13.data)
voom.results = il13.data$voom
contrast = il13.data$contra
# find Differentially Expressed genes
library(limma)
vfit = lmFit(voom.results, voom.results$design)
vfit = contrasts.fit(vfit, contrast)
vfit = eBayes(vfit)
# select DE genes (Entrez IDs and logFC) at p-value <= 0.05 and |logFC| >= 1
top.Table = topTable(vfit, coef=1, number=Inf, p.value=0.05, lfc=1)
deGenes = as.character(top.Table$FeatureID)
logFC = top.Table$logFC
names(logFC) = deGenes
# build the gene set collection index 
gs.annots = buildIdxEZID(entrezIDs=deGenes, species="human", msigdb.gsets="none",
 			kegg.exclude = c("Metabolism"))
# perform the ORA analysis 
# set report = TRUE to generate the EGSEA interactive report
gsa = egsea.ora(entrezIDs=deGenes, universe= as.character(voom.results$genes[,1]),
				logFC =logFC, title="X24IL13-X24",  gs.annots=gs.annots, 
  			    symbolsMap=top.Table[, c(1,2)], display.top = 5,          
  			    egsea.dir="./il13-egsea-ora-report", num.threads = 4, report=FALSE)


## ----echo=TRUE, eval=TRUE, tidy=TRUE, tidy.opts=list(blank=FALSE, width.cutoff=60)----
library(EGSEAdata)
data(il13.data)
v = il13.data$voom
# load KEGG pathways
data(kegg.pathways)
# select 50 pathways
gsets = kegg.pathways$human$kg.sets[1:50]
gsets[1]
# build custom gene set collection using these 50 pathways
gs.annots = buildCustomIdxEZID(entrezIDs=rownames(v$E), gsets= gsets, species="human")
names(gs.annots)
colnames(gs.annots$anno)

