## ---- eval=FALSE--------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE)) {
#    install.packages("BiocManager")
#  }
#  BiocManager::install("nnSVG")

## ---- message=FALSE-----------------------------------------------------------
library(SpatialExperiment)
library(STexampleData)
library(scran)
library(nnSVG)
library(ggplot2)

## -----------------------------------------------------------------------------
# load example dataset from STexampleData package
spe <- Visium_humanDLPFC()

dim(spe)

## -----------------------------------------------------------------------------
# preprocessing steps

# keep only spots over tissue
spe <- spe[, colData(spe)$in_tissue == 1]
dim(spe)

# skip spot-level quality control, since this has been performed previously 
# on this dataset

# filter low-expressed and mitochondrial genes
# using default filtering parameters
spe <- filter_genes(spe)

# calculate logcounts (log-transformed normalized counts) using scran package
# using library size factors
spe <- computeLibraryFactors(spe)
spe <- logNormCounts(spe)

assayNames(spe)

## -----------------------------------------------------------------------------
# select small set of random genes and several known SVGs for 
# faster runtime in this example
set.seed(123)
ix_random <- sample(seq_len(nrow(spe)), 10)

known_genes <- c("MOBP", "PCP4", "SNAP25", "HBB", "IGKC", "NPY")
ix_known <- which(rowData(spe)$gene_name %in% known_genes)

ix <- c(ix_known, ix_random)

spe <- spe[ix, ]
dim(spe)

## -----------------------------------------------------------------------------
# run nnSVG

# set seed for reproducibility
set.seed(123)
# using a single thread in this example
spe <- nnSVG(spe)

# show results
rowData(spe)

## -----------------------------------------------------------------------------
# number of significant SVGs
table(rowData(spe)$padj <= 0.05)

## -----------------------------------------------------------------------------
# show results for top n SVGs
rowData(spe)[order(rowData(spe)$rank)[1:10], ]

## ---- fig.width=5, fig.height=5-----------------------------------------------
# plot spatial expression of top-ranked SVG
ix <- which(rowData(spe)$rank == 1)
ix_name <- rowData(spe)$gene_name[ix]
ix_name

df <- as.data.frame(
  cbind(spatialCoords(spe), 
        expr = counts(spe)[ix, ]))

ggplot(df, aes(x = pxl_col_in_fullres, y = pxl_row_in_fullres, color = expr)) + 
  geom_point(size = 0.8) + 
  coord_fixed() + 
  scale_y_reverse() + 
  scale_color_gradient(low = "gray90", high = "blue", 
                       trans = "sqrt", breaks = range(df$expr), 
                       name = "counts") + 
  ggtitle(ix_name) + 
  theme_bw() + 
  theme(plot.title = element_text(face = "italic"), 
        panel.grid = element_blank(), 
        axis.title = element_blank(), 
        axis.text = element_blank(), 
        axis.ticks = element_blank())

## ---- message=FALSE-----------------------------------------------------------
library(SpatialExperiment)
library(STexampleData)
library(scran)
library(nnSVG)
library(ggplot2)

## -----------------------------------------------------------------------------
# load example dataset from STexampleData package
spe <- SlideSeqV2_mouseHPC()

dim(spe)

## -----------------------------------------------------------------------------
# preprocessing steps

# remove spots with NA cell type labels
spe <- spe[, !is.na(colData(spe)$celltype)]
dim(spe)

# check cell type labels
table(colData(spe)$celltype)

# filter low-expressed and mitochondrial genes
# using adjusted filtering parameters for this platform
spe <- filter_genes(
  spe, 
  filter_genes_ncounts = 1, 
  filter_genes_pcspots = 1, 
  filter_mito = TRUE
)

dim(spe)

# calculate log-transformed normalized counts using scran package
# using library size normalization
spe <- computeLibraryFactors(spe)
spe <- logNormCounts(spe)

assayNames(spe)

## -----------------------------------------------------------------------------
# select small set of random genes and several known SVGs for 
# faster runtime in this example
set.seed(123)
ix_random <- sample(seq_len(nrow(spe)), 10)

known_genes <- c("Cpne9", "Rgs14")
ix_known <- which(rowData(spe)$gene_name %in% known_genes)

ix <- c(ix_known, ix_random)

spe <- spe[ix, ]
dim(spe)

## -----------------------------------------------------------------------------
# run nnSVG with covariates

# create model matrix for cell type labels
X <- model.matrix(~ colData(spe)$celltype)
dim(X)
stopifnot(nrow(X) == ncol(spe))

# set seed for reproducibility
set.seed(123)
# using a single thread in this example
spe <- nnSVG(spe, X = X)

# show results
rowData(spe)

## -----------------------------------------------------------------------------
# create model matrix after dropping empty factor levels
X <- model.matrix(~ droplevels(as.factor(colData(spe)$celltype)))

## -----------------------------------------------------------------------------
# number of significant SVGs
table(rowData(spe)$padj <= 0.05)

## -----------------------------------------------------------------------------
# show results for top n SVGs
rowData(spe)[order(rowData(spe)$rank)[1:10], ]

## ---- fig.width=5, fig.height=3.25--------------------------------------------
# plot spatial expression of top-ranked SVG
ix <- which(rowData(spe)$rank == 1)
ix_name <- rowData(spe)$gene_name[ix]
ix_name

df <- as.data.frame(
  cbind(spatialCoords(spe), 
        expr = counts(spe)[ix, ]))

ggplot(df, aes(x = xcoord, y = ycoord, color = expr)) + 
  geom_point(size = 0.1) + 
  coord_fixed() + 
  scale_color_gradient(low = "gray90", high = "blue", 
                       trans = "sqrt", breaks = range(df$expr), 
                       name = "counts") + 
  ggtitle(ix_name) + 
  theme_bw() + 
  theme(plot.title = element_text(face = "italic"), 
        panel.grid = element_blank(), 
        axis.title = element_blank(), 
        axis.text = element_blank(), 
        axis.ticks = element_blank())

## -----------------------------------------------------------------------------
sessionInfo()

