## ---- eval=TRUE, message=FALSE, warning=FALSE, echo=FALSE------------------
library(COCOA)
library(ggplot2)
data("brcaPCScores657")
ggplot(data = brcaPCScores657, mapping = aes(x=PC1, y=PC4, col=ER_Status)) + geom_point() + ggtitle("PCA of DNA methylation data from breast cancer patients") + theme_classic()

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
library(COCOA)
library(data.table)
library(ggplot2)
data("esr1_chr1")
data("gata3_chr1")
data("nrf1_chr1")
data("atf3_chr1")
data("brcaLoadings1")
data("brcaMCoord1")
data("brcaPCScores")
data("brcaMethylData1")

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
# prepare data
GRList <- GRangesList(esr1_chr1, gata3_chr1, nrf1_chr1, atf3_chr1) 
regionSetName <- c("esr1_chr1", "gata3_chr1", "nrf1_chr1", "atf3_chr1")

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
PCsToAnnotate <- paste0("PC", 1:4)
regionSetScores <- runCOCOA(loadingMat=brcaLoadings1, 
                            signalCoord=brcaMCoord1, 
                            GRList=GRList, 
                            PCsToAnnotate=PCsToAnnotate, 
                            scoringMetric="regionMean")
regionSetScores$regionSetName <- regionSetName

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
rsScoreHeatmap(regionSetScores, 
               PCsToAnnotate=paste0("PC", 1:4), 
               rsNameCol = "regionSetName",
               orderByPC = "PC1", 
               column_title = "Region sets ordered by score for PC1")

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
rsScoreHeatmap(regionSetScores, 
               PCsToAnnotate=paste0("PC", 1:4),
               rsNameCol = "regionSetName",
               orderByPC = "PC2", 
               column_title = "Region sets ordered by score for PC2")

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
wideGRList <- lapply(GRList, resize, width=14000, fix="center")
loadProfile <- lapply(wideGRList, function(x) getLoadingProfile(loadingMat=brcaLoadings1,
                                                                signalCoord=brcaMCoord1,
                                                                regionSet=x, 
                                                                PCsToAnnotate=PCsToAnnotate,
                                                                binNum=21))

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
# average loading value from each PC to normalize so PCs can be compared with each other
avLoad <- apply(X=brcaLoadings1[, PCsToAnnotate], 
                MARGIN=2, 
                FUN=function(x) mean(abs(x)))

# normalize
loadProfile <- lapply(loadProfile, 
                      FUN=function(x) as.data.frame(mapply(FUN = function(y, z) x[, y] - z, 
                                                           y=PCsToAnnotate, z=avLoad)))
binID = 1:nrow(loadProfile[[1]])
loadProfile <- lapply(loadProfile, FUN=function(x) cbind(binID, x))

# for the plot scale
maxVal <- max(sapply(loadProfile, FUN=function(x) max(x[, PCsToAnnotate])))
minVal <- min(sapply(loadProfile, FUN=function(x) min(x[, PCsToAnnotate])))

# convert to long format for plots
loadProfile <- lapply(X=loadProfile, FUN=function(x) tidyr::gather(data=x, key="PC", value="loading_value", PCsToAnnotate))
loadProfile <- lapply(loadProfile, 
                      function(x){x$PC <- factor(x$PC, levels=PCsToAnnotate); return(x)})

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
wrapper <- function(x, ...) paste(strwrap(x, ...), collapse="\n") 
profilePList <- list()
for (i in seq_along(loadProfile)) {
    
    thisRS <- loadProfile[[i]]
    
    profilePList[[i]] <- ggplot(data=thisRS, 
                                mapping=aes(x=binID , y=loading_value)) + 
        geom_line() + ylim(c(minVal, maxVal)) + facet_wrap(facets="PC") + 
        ggtitle(label=wrapper(regionSetName[i], width=30)) + 
        xlab("Genome around region set, 14 kb") + 
        ylab("Normalized loading value") + 
        theme(panel.grid.major.x=element_blank(), 
              panel.grid.minor.x=element_blank(), 
              axis.text.x=element_blank(), 
              axis.ticks.x=element_blank())
    profilePList[[i]]

}
profilePList[[1]]
profilePList[[2]]
profilePList[[3]]
profilePList[[4]]

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
signalAlongPC(genomicSignal=brcaMethylData1,
                signalCoord=brcaMCoord1,
                regionSet=esr1_chr1,
                pcScores=brcaPCScores,
                orderByPC="PC1", cluster_columns=TRUE,
                column_title = "Individual cytosine/CpG",
                name = "DNA methylation level")

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
signalAlongPC(genomicSignal=brcaMethylData1,
              signalCoord=brcaMCoord1,
              regionSet=nrf1_chr1,
              pcScores=brcaPCScores,
              orderByPC="PC1", 
              cluster_columns=TRUE, 
              column_title = "Individual cytosine/CpG",
              name = "DNA methylation level")

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
regionQuantileByPC(loadingMat = brcaLoadings1,
                                signalCoord = brcaMCoord1,
                                regionSet = esr1_chr1,
                                rsName = "Estrogen receptor (chr1)",
                                PCsToAnnotate=paste0("PC", 1:4),
                                maxRegionsToPlot = 8000,
                                cluster_rows = TRUE,
                                cluster_columns = FALSE,
                                column_title = rsName,
                                name = "Percentile of loading scores in PC")

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
regionQuantileByPC(loadingMat = brcaLoadings1,
                                signalCoord = brcaMCoord1,
                                regionSet = nrf1_chr1,
                                rsName = "Nrf1 (chr1)",
                                PCsToAnnotate=paste0("PC", 1:4),
                                maxRegionsToPlot = 8000,
                                cluster_rows = TRUE,
                                cluster_columns = FALSE,
                                column_title = rsName,
                                name = "Percentile of loading scores in PC")

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
toyData = data.frame(F1 = 1:5, F2 = c(2, 0, 6, 4, 3), F3 = c(1, 3, 2, 10, 5))

## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------
prcomp(toyData)$rotation

## ---- eval=FALSE, message=FALSE, warning=FALSE-----------------------------
#  PC1 = 0.319 * F1 + 0.201 * F2 + 0.926 * F3
#  PC2 = -0.178 * F1 + -0.947 * F2 + 0.267 * F3
#  PC3 = -0.931 * F1 + 0.250 * F2 + 0.267 * F3

