
## ----setup, include=FALSE, cache=FALSE-----------------------------------
library(knitr)
## set global chunk options
opts_chunk$set(fig.path='figure/manual-', cache.path='cache/manual-', fig.align='center', fig.show='hold', par=TRUE)
## I use = but I can replace it with <-; set code/output width to be 68
options(replace.assign=TRUE, width=68, digits=4)
## tune details of base graphics (http://yihui.name/knitr/hooks)
knit_hooks$set(par=function(before, options, envir){
if (before && options$fig.show!='none') par(mar=c(4,4,.1,.1),cex.lab=.95,cex.axis=.9,mgp=c(2,.7,0),tcl=-.3)
})


## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"------------------
BiocStyle::latex()


## ----loadTFBSTools, echo=FALSE, warning=FALSE----------------------------
library("TFBSTools")


## ----PFMatrix, eval=TRUE, cache=TRUE, results = "markup"-----------------
## PFMatrix construction; Not all of the slots need to be initialised.
pfm = PFMatrix(ID="MA0004.1", name="Arnt", matrixClass="Zipper-Type", strand="+",
        bg=c(A=0.25, C=0.25, G=0.25, T=0.25),
        tags=list(family="Helix-Loop-Helix", species="10090", 
                  tax_group="vertebrates",medline="7592839", type="SELEX", 
                  ACC="P53762", pazar_tf_id="TF0000003",
                  TFBSshape_ID="11", TFencyclopedia_ID="580"),
        profileMatrix=matrix(c(4L,  19L, 0L,  0L,  0L,  0L,
                               16L, 0L,  20L, 0L,  0L,  0L,
                               0L,  1L,  0L,  20L, 0L,  20L,
                               0L,  0L,  0L,  0L,  20L, 0L),
                             byrow=TRUE, nrow=4, 
                             dimnames=list(c("A", "C", "G", "T")))
        )

pfm 

## coerced to matrix
as.matrix(pfm)

## access the slots of pfm
ID(pfm)
name(pfm)
Matrix(pfm)
ncol(pfm)
length(pfm)

## convert a FPM to PWM, ICM
pwm = toPWM(pfm, type="log2probratio", pseudocounts=0.8,
            bg=c(A=0.25, C=0.25, G=0.25, T=0.25))

icm = toICM(pfm, pseudocounts=sqrt(rowSums(pfm)[1]), schneider=FALSE,
            bg=c(A=0.25, C=0.25, G=0.25, T=0.25))

## get the reverse complment matrix with all the same information except the strand.
pwmRevComp <- reverseComplement(pwm)


## ----PFMatrixList, eval=TRUE, results ="markup", cache=TRUE--------------
pfm2 = pfm
pfmList = PFMatrixList(pfm1=pfm, pfm2=pfm2, use.names=TRUE)
pfmList
names(pfmList)


## ----searchDB, eval=TRUE, results = "markup", cache=TRUE-----------------
library(JASPAR2014)
opts = list()
opts[["species"]] = 9606
opts[["name"]] = "RUNX1"
#opts[["class"]] = "Ig-fold"
opts[["type"]] = "SELEX"
opts[["all_versions"]] = TRUE
PFMatrixList = getMatrixSet(JASPAR2014, opts)
PFMatrixList

opts2 = list()
opts2[["type"]] = "SELEX"
PFMatrixList2 = getMatrixSet(JASPAR2014, opts2)
PFMatrixList2


## ----operateDb, eval=FALSE, results="hide"-------------------------------
## db = "myMatrixDb.sqlite"
## initializeJASPARDB(db)
## storeMatrix(db, pfm)
## deleteMatrixHavingID(db, "MA0003")


## ----PWMmatrixMethods, eval=TRUE, results = "markup", cache=TRUE---------
pwm = toPWM(pfm, pseudocounts=0.8)
pwm


## ----ICMmatrixMethods, eval=TRUE, results = "markup", cache=TRUE---------
icm = toICM(pfm, pseudocounts=0.8, schneider=TRUE)
icm


## ----seqLogo1, fig.show="hold", fig.width=6, fig.height=4, dpi=300-------
seqLogo(icm)


## ----rtl-PFMSimi, eval=TRUE, results = "markup", cache=TRUE--------------
## one to one comparison
data(MA0003.2)
data(MA0004.1)
pfmSubject = MA0003.2
pfmQuery =  MA0004.1
PFMSimilarity(pfmSubject, pfmQuery)

## one to several comparsion
PFMSimilarity(pfmList, pfmQuery)

## align IUPAC string
IUPACString = "ACGTMRWSYKVHDBN"
PFMSimilarity(pfmList, IUPACString)


## ----rtl-PWMSimilarity, eval=TRUE, results = "markup", cache=TRUE--------
  data(MA0003.2)
  data(MA0004.1)
  pwm1 = toPWM(MA0003.2, type="prob")
  pwm2 = toPWM(MA0004.1, type="prob")
  PWMSimilarity(pwm1, pwm2, method="Euclidean")
  PWMSimilarity(pwm1, pwm2, method="Pearson")
  PWMSimilarity(pwm1, pwm2, method="KL")


## ----permuteMatrix, eval=TRUE, results = "markup", cache=TRUE------------
## Matrice permutation
permuteMatrix(pfmQuery)
permuteMatrix(pfmList, type="intra")
permuteMatrix(pfmList, type="inter")


## ----samplingMatrix, eval=FALSE, results = "markup", cache=TRUE----------
## ## Dirichlet model training
## data(MA0003.2)
## data(MA0004.1)
## pfmList <- PFMatrixList(pfm1=MA0003.2, pfm2=MA0004.1, use.names=TRUE)
## dmmParameters <- dmmEM(pfmList, K=6, alg="C")
## ## Matrice sampling from trained Dirichlet model
## pwmSampled <- rPWMDmm(MA0003.2, dmmParameters$alpha0, dmmParameters$pmix,
##                       N=1, W=6)


## ----searchSeq, eval=TRUE, results = "markup", cache=TRUE----------------
library(Biostrings)
data(MA0003.2)
data(MA0004.1)
pwmList = PWMatrixList(MA0003.2=toPWM(MA0003.2), MA0004.1=toPWM(MA0004.1), 
                       use.names=TRUE)
subject = DNAString("GAATTCTCTCTTGTTGTAGTCTCTTGACAAAATG")
siteset = searchSeq(pwm, subject, seqname="seq1", min.score="60%", strand="*")

sitesetList = searchSeq(pwmList, subject, seqname="seq1",
                        min.score="60%", strand="*")


## generate gff2 or gff3 style output
head(writeGFF3(siteset))
head(writeGFF3(sitesetList))
head(writeGFF2(siteset))

## get the relative scores
relScore(siteset)
relScore(sitesetList)

## calculate the empirical p-values of the scores
pvalues(siteset, type="TFMPvalue")
pvalues(siteset, type="sampling")


## ----searchAln, eval=TRUE, results = "markup", cache=TRUE----------------
library(Biostrings)
data(MA0003.2)
pwm <- toPWM(MA0003.2)
aln1 <- DNAString("ACTTCACCAGCTCCCTGGCGGTAAGTTGATC---AAAGG---AAACGCAAAGTTTTCAAG")
aln2 <- DNAString("GTTTCACTACTTCCTTTCGGGTAAGTAAATATATAAATATATAAAAATATAATTTTCATC")
sitePairSet <- searchAln(pwm, aln1, aln2, seqname1="seq1", seqname2="seq2",
                         min.score="50%", cutoff=0.5, 
                         strand="*", type="any")
## generate gff style output
head(writeGFF3(sitePairSet))
head(writeGFF2(sitePairSet))

## search the Axt alignment
library(CNEr)
axtFilesHg19DanRer7 <- file.path(system.file("extdata", package="CNEr"),
                                 "hg19.danRer7.net.axt")
axtHg19DanRer7 <- readAxt(axtFilesHg19DanRer7)
sitePairSet <-  searchAln(pwm, axtHg19DanRer7, min.score="80%",
                          windowSize=51L, cutoff=0.7, strand="*",
                          type="any", conservation=NULL, mc.cores=2)
GRangesTFBS <- toGRangesList(sitePairSet, axtHg19DanRer7)
GRangesTFBS$targetTFBS
GRangesTFBS$queryTFBS


## ----searchBSgenome, eval=FALSE, results = "hide"------------------------
## library(rtracklayer)
## library(JASPAR2014)
## library(BSgenome.Hsapiens.UCSC.hg19)
## library(BSgenome.Mmusculus.UCSC.mm10)
## pfm <- getMatrixByID(JASPAR2014, ID="MA0004.1")
## pwm <- toPWM(pfm)
## chain <- import.chain("Downloads/hg19ToMm10.over.chain")
## sitePairSet <- searchPairBSgenome(pwm, BSgenome.Hsapiens.UCSC.hg19,
##                                  BSgenome.Mmusculus.UCSC.mm10,
##                                  chr1="chr1", chr2="chr1",
##                                  min.score="90%", strand="+", chain=chain)


## ----MEME-wrapper, eval=FALSE, results = "hide"--------------------------
## motifSet <- runMEME(file.path(system.file("extdata",
##                                           package="TFBSTools"), "crp0.s"),
##                     binary="meme",
##                     arguments=list("-nmotifs"=3)
##                    )
## ## Get the sites sequences and surrounding sequences
## sitesSeq(motifSet, type="all")
## ## Get the sites sequences only
## sitesSeq(motifSet, type="none")
## consensusMatrix(motifSet)


## ----session-info--------------------------------------------------------
  sessionInfo()


