## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE----------------
## For links
library("BiocStyle")

## Track time spent on making the vignette
startTime <- Sys.time()

## Bib setup
library("knitcitations")

## Load knitcitations with a clean bibliography
cleanbib()
cite_options(hyperlink = "to.doc", citation_format = "text", style = "html")
# Note links won't show for now due to the following issue
# https://github.com/cboettig/knitcitations/issues/63

## Write bibliography information
bib <- c(
    R = citation(),
    AnnotationHub = citation("AnnotationHub"),
    benchmarkme = citation("benchmarkme"),
    BiocStyle = citation("BiocStyle"),
    cowplot = citation("cowplot"),
    DT = citation("DT"),
    ExperimentHub = citation("ExperimentHub"),
    fields = citation("fields"),
    ggplot2 = citation("ggplot2"),
    golem = citation("golem"),
    IRanges = citation("IRanges"),
    knitcitations = citation("knitcitations"),
    knitr = citation("knitr")[3],
    plotly = citation("plotly"),
    Polychrome = citation("Polychrome"),
    RColorBrewer = citation("RColorBrewer"),
    rmarkdown = citation("rmarkdown")[1],
    S4Vectors = citation("S4Vectors"),
    scater = citation("scater"),
    sessioninfo = citation("sessioninfo"),
    SingleCellExperiment = citation("SingleCellExperiment"),
    shiny = citation("shiny"),
    SummarizedExperiment = citation("SummarizedExperiment"),
    testthat = citation("testthat"),
    viridisLite = citation("viridisLite"),
    spatialLIBD = citation("spatialLIBD")[1],
    spatialLIBDpaper = citation("spatialLIBD")[2]
)

## ----'install', eval = FALSE--------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE)) {
#        install.packages("BiocManager")
#    }
#  
#  BiocManager::install("spatialLIBD")
#  
#  ## Check that you have a valid Bioconductor installation
#  BiocManager::valid()

## ----'citation'---------------------------------------------------------------
## Citation info
citation("spatialLIBD")

## ----setup, message = FALSE, warning = FALSE----------------------------------
library("spatialLIBD")

## ----'experiment_hub'---------------------------------------------------------
## Connect to ExperimentHub
ehub <- ExperimentHub::ExperimentHub()

## ----'download_data'----------------------------------------------------------
## Downloa small example sce data
if (!exists("sce")) sce <- fetch_data(type = "sce_example", eh = ehub)

## If you want to download the full real data (about 2.1 GB in RAM) use:
if (FALSE) {
    if (!exists("sce")) sce <- fetch_data(type = "sce", eh = ehub)
}

## Query ExperimentHub and download the data
if (!exists("sce_layer")) sce_layer <- fetch_data(type = "sce_layer", eh = ehub)
modeling_results <- fetch_data("modeling_results", eh = ehub)

## ----'explore data'-----------------------------------------------------------
## spot-level data
sce

## layer-level data
sce_layer

## list of modeling result tables
sapply(modeling_results, class)
sapply(modeling_results, dim)
sapply(modeling_results, function(x) {
      head(colnames(x))
  })

## ----'create_sig_genes'-------------------------------------------------------
## Convert to a long format the modeling results
## This takes a few seconds to run
system.time(
    sig_genes <-
        sig_genes_extract_all(
            n = nrow(sce_layer),
            modeling_results = modeling_results,
            sce_layer = sce_layer
        )
)

## Explore the result
class(sig_genes)
dim(sig_genes)

## -----------------------------------------------------------------------------
if (interactive()) {
    run_app(
        sce = sce,
        sce_layer = sce_layer,
        modeling_results = modeling_results,
        sig_genes = sig_genes
    )
}

## ----'sce_image_clus', fig.small = TRUE---------------------------------------
## View our LIBD layers for one sample
sce_image_clus(
    sce = sce,
    clustervar = "layer_guess_reordered",
    sampleid = "151673",
    colors = libd_layer_colors,
    ... = " LIBD Layers"
)

## ----'sce_image_clus_variables'-----------------------------------------------
## This is not fully precise, but gives you a rough idea
## Some integer columns are actually continuous variables
table(sapply(colData(sce), class) %in% c("factor", "integer"))

## This is more precise (one cluster has 28 unique values)
table(sapply(colData(sce), function(x) length(unique(x))) < 29)

## ----'sce_image_clus_no_spatial', fig.small = TRUE----------------------------
## View our LIBD layers for one sample
## without spatial information
sce_image_clus(
    sce = sce,
    clustervar = "layer_guess_reordered",
    sampleid = "151673",
    colors = libd_layer_colors,
    ... = " LIBD Layers",
    spatial = FALSE
)

## ----'libd_layer_colors'------------------------------------------------------
## Color palette designed by Lukas M. Weber with feedback from the team.
libd_layer_colors

## ----'sce_image_gene', fig.small = TRUE---------------------------------------
## Available gene expression assays
assayNames(sce)

## Not all of these make sense to visualize
## In particular, barcode, row, col, imagerow, imagecol, key are not
## useful to visualize.
colnames(colData(sce))[sapply(colData(sce), function(x) {
      length(unique(x))
  }) > 29]

## Visualize a gene
sce_image_gene(
    sce = sce,
    sampleid = "151673",
    viridis = FALSE
)

## Visualize the estimated number of cells per spot
sce_image_gene(
    sce = sce,
    sampleid = "151673",
    geneid = "cell_count"
)

## Visualize the fraction of chrM expression per spot
## without the spatial layer
sce_image_gene(
    sce = sce,
    sampleid = "151673",
    geneid = "expr_chrM_ratio",
    spatial = FALSE
)

## ----'sig_genes_detail'-------------------------------------------------------
head(sig_genes)

## ----'layer_boxplot'----------------------------------------------------------
## Note that we recommend setting the random seed so the jittering of the
## points will be reproducible. Given the requirements by BiocCheck this
## cannot be done inside the layer_boxplot() function.

## Create a boxplot of the first gene in `sig_genes`.
set.seed(20200206)
layer_boxplot(sig_genes = sig_genes, sce_layer = sce_layer)

## Viridis colors displayed in the shiny app
## showing the first pairwise model result
## (which illustrates the background colors used for the layers not
## involved in the pairwise comparison)
set.seed(20200206)
layer_boxplot(
    i = which(sig_genes$model_type == "pairwise")[1],
    sig_genes = sig_genes,
    sce_layer = sce_layer,
    col_low_box = viridisLite::viridis(4)[2],
    col_low_point = viridisLite::viridis(4)[1],
    col_high_box = viridisLite::viridis(4)[3],
    col_high_point = viridisLite::viridis(4)[4]
)

## Paper colors displayed in the shiny app
set.seed(20200206)
layer_boxplot(
    sig_genes = sig_genes,
    sce_layer = sce_layer,
    short_title = FALSE,
    col_low_box = "palegreen3",
    col_low_point = "springgreen2",
    col_high_box = "darkorange2",
    col_high_point = "orange1"
)

## ----'matt_t_stats'-----------------------------------------------------------
## Explore the enrichment t-statistics derived from Tran et al's snRNA-seq
## DLPFC data
dim(tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer)
tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer[seq_len(3), seq_len(6)]

## ----'layer_stat_cor'---------------------------------------------------------
## Compute the correlation matrix of enrichment t-statistics between our data
## and Tran et al's snRNA-seq data
cor_stats_layer <- layer_stat_cor(
    tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer,
    modeling_results,
    "enrichment"
)

## Explore the correlation matrix
head(cor_stats_layer[, seq_len(3)])

## ----'layer_stat_cor_plot', fig.wide = TRUE-----------------------------------
## Visualize the correlation matrix
layer_stat_cor_plot(cor_stats_layer, max = max(cor_stats_layer))

## ----'read_sfari'-------------------------------------------------------------
## Read in the SFARI gene sets included in the package
asd_sfari <- utils::read.csv(
    system.file(
        "extdata",
        "SFARI-Gene_genes_01-03-2020release_02-04-2020export.csv",
        package = "spatialLIBD"
    ),
    as.is = TRUE
)

## Format them appropriately
asd_sfari_geneList <- list(
    Gene_SFARI_all = asd_sfari$ensembl.id,
    Gene_SFARI_high = asd_sfari$ensembl.id[asd_sfari$gene.score < 3],
    Gene_SFARI_syndromic = asd_sfari$ensembl.id[asd_sfari$syndromic == 1]
)

## ----'sfari_enrichment'-------------------------------------------------------
## Compute the gene set enrichment results
asd_sfari_enrichment <- gene_set_enrichment(
    gene_list = asd_sfari_geneList,
    modeling_results = modeling_results,
    model_type = "enrichment"
)

## Explore the results
head(asd_sfari_enrichment)

## ----'sfari_enrichment_plot'--------------------------------------------------
## Visualize gene set enrichment results
gene_set_enrichment_plot(
    asd_sfari_enrichment,
    xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)),
    layerHeights = c(0, 40, 55, 75, 85, 110, 120, 135)
)

## ----'required_structure'-----------------------------------------------------
## Images data stored under metadata(sce)$image
head(metadata(sce)$image)

## Ensembl gene IDs are row names with the symbol (gene_name) and Ensembl
## ID (gene_name) pasted into `gene_search`
## gene_search <- paste0(gene_name, '; ', gene_id)
rowData(sce)[, c("gene_id", "gene_name", "gene_search")]
## This information also has to be present in sce_layer
stopifnot(all(
    c("gene_id", "gene_name", "gene_search") %in% colnames(rowData(sce_layer))
))


## Information about the image coordinates stored in imagerow and imagecol
colData(sce)[, c("imagerow", "imagecol")]

## The sample names stored under sce$sample_name
stopifnot("sample_name" %in% colnames(colData(sce)))

## A unique spot-level ID (such as barcode) stored under sce$key
## The 'key' column is necessary for the plotly code to work.
stopifnot("key" %in% colnames(colData(sce)))
stopifnot(length(unique(sce$key)) == ncol(sce))
## Here's how we made those unique keys
identical(sce$key, paste0(sce$sample_name, "_", sce$barcode))

## Some cluster variables that you can specify with run_app().
## For example:
cluster_variables <- c(
    "GraphBased",
    "Layer",
    "Maynard",
    "Martinowich",
    paste0("SNN_k50_k", 4:28),
    "layer_guess_reordered_short",
    "SpatialDE_PCA",
    "SpatialDE_pool_PCA",
    "HVG_PCA",
    "pseudobulk_PCA",
    "markers_PCA",
    "SpatialDE_UMAP",
    "SpatialDE_pool_UMAP",
    "HVG_UMAP",
    "pseudobulk_UMAP",
    "markers_UMAP",
    "SpatialDE_PCA_spatial",
    "SpatialDE_pool_PCA_spatial",
    "HVG_PCA_spatial",
    "pseudobulk_PCA_spatial",
    "markers_PCA_spatial",
    "SpatialDE_UMAP_spatial",
    "SpatialDE_pool_UMAP_spatial",
    "HVG_UMAP_spatial",
    "pseudobulk_UMAP_spatial",
    "markers_UMAP_spatial"
)
stopifnot(all(cluster_variables %in% colnames(colData(sce))))
## The last one gets renamed to spatialLIBD in spatialLIBD::app_server()

## Some continuous variables that you can specify in run_app()
## as well as the already mentioned rowData(sce)$gene_search
## For example:
continuous_variables <- c(
    "cell_count",
    "sum_umi",
    "sum_gene",
    "expr_chrM",
    "expr_chrM_ratio"
)
stopifnot(all(continuous_variables %in% colnames(colData(sce))))

## None of the values of rowData(sce)$gene_search should be re-used in
## colData(sce)
stopifnot(!all(rowData(sce)$gene_search %in% colnames(colData(sce))))

## No column named COUNT as that's used by sce_image_gene_p() and related
## functions.
stopifnot(!"COUNT" %in% colnames(colData(sce)))

## The counts and logcounts assays
stopifnot(all(c("counts", "logcounts") %in% assayNames(sce)))

## For the layer-level data we also need layer_guess_reordered
## although this is a parameter you can change in run_app()
stopifnot("layer_guess_reordered" %in% colnames(colData(sce_layer)))

stopifnot(all(
    c("enrichment", "pairwise", "anova") %in% names(modeling_results)
))

## All the model tables contain the ensembl column
stopifnot(all(sapply(modeling_results, function(x) {
      "ensembl" %in% colnames(x)
  })))

## The following column prefixes are present in each of the model tables
expected_column_name_starts <-
    c("^[f|t]_stat_", "^p_value_", "^fdr_")
stopifnot(all(sapply(modeling_results, function(model_table) {
    all(sapply(expected_column_name_starts, function(expected_prefix) {
          any(grepl(
              expected_prefix, colnames(model_table)
          ))
      }))
})))

## ----'run_check_functions'----------------------------------------------------
check_sce(sce)
check_sce_layer(sce_layer)
## The output here is too long to print
xx <- check_modeling_results(modeling_results)
identical(xx, modeling_results)

## ----createVignette, eval=FALSE-----------------------------------------------
#  ## Create the vignette
#  library("rmarkdown")
#  system.time(render("spatialLIBD.Rmd"))
#  
#  ## Extract the R code
#  library("knitr")
#  knit("spatialLIBD.Rmd", tangle = TRUE)

## ----reproduce1, echo=FALSE---------------------------------------------------
## Date the vignette was generated
Sys.time()

## ----reproduce2, echo=FALSE---------------------------------------------------
## Processing time in seconds
totalTime <- diff(c(startTime, Sys.time()))
round(totalTime, digits = 3)

## ----reproduce3, echo=FALSE-------------------------------------------------------------------------------------------
## Session info
library("sessioninfo")
options(width = 120)
session_info()

## ----vignetteBiblio, results = 'asis', echo = FALSE, warning = FALSE, message = FALSE---------------------------------
## Print bibliography
bibliography()