## ----setup, include = FALSE------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- message=FALSE--------------------------------------------------------
# The dataset package
library(TENxBUSData)
library(BUSpaRse)
library(Matrix)
library(DropletUtils)
library(zeallot)
library(ggplot2)

## --------------------------------------------------------------------------
TENxBUSData(".", dataset = "hgmm100")

## --------------------------------------------------------------------------
tr2g <- transcript2gene(species = c("Homo sapiens", "Mus musculus"), type = "vertebrate",
                         kallisto_out_path = "./out_hgmm100", ensembl_version = 94)

## --------------------------------------------------------------------------
head(tr2g)

## --------------------------------------------------------------------------
c(gene_count, tcc) %<-% make_sparse_matrix("./out_hgmm100/output.sorted.txt",
                               tr2g = tr2g, est_ncells = 1e5,
                              est_ngenes = nrow(tr2g), ncores = 2)

## --------------------------------------------------------------------------
dim(gene_count)

## --------------------------------------------------------------------------
tot_counts <- Matrix::colSums(gene_count)
summary(tot_counts)

## --------------------------------------------------------------------------
bc_rank <- barcodeRanks(gene_count)

## --------------------------------------------------------------------------
qplot(bc_rank$rank, bc_rank$total, geom = "line") +
  geom_hline(yintercept = metadata(bc_rank)$knee, color = "blue", linetype = 2) +
  geom_hline(yintercept = metadata(bc_rank)$inflection, color = "green", linetype = 2) +
  annotate("text", x = 1000, y = 1.5 * c(metadata(bc_rank)$knee, metadata(bc_rank)$inflection),
           label = c("knee", "inflection"), color = c("blue", "green")) +
  scale_x_log10() +
  scale_y_log10() +
  labs(x = "Rank", y = "Total UMI counts") +
  theme_bw()

## --------------------------------------------------------------------------
gene_count <- gene_count[, tot_counts > metadata(bc_rank)$inflection]
dim(gene_count)

## --------------------------------------------------------------------------
dim(tcc)

## --------------------------------------------------------------------------
tot_counts <- Matrix::colSums(tcc)
summary(tot_counts)

## --------------------------------------------------------------------------
bc_rank <- barcodeRanks(tcc)

## --------------------------------------------------------------------------
qplot(bc_rank$rank, bc_rank$total, geom = "line") +
  geom_hline(yintercept = metadata(bc_rank)$knee, color = "blue", linetype = 2) +
  geom_hline(yintercept = metadata(bc_rank)$inflection, color = "green", linetype = 2) +
  annotate("text", x = 1000, y = 1.5 * c(metadata(bc_rank)$knee, metadata(bc_rank)$inflection),
           label = c("knee", "inflection"), color = c("blue", "green")) +
  scale_x_log10() +
  scale_y_log10() +
  labs(x = "Rank", y = "Total UMI counts") +
  theme_bw()

## --------------------------------------------------------------------------
tcc <- tcc[, tot_counts > metadata(bc_rank)$inflection]
dim(tcc)

## --------------------------------------------------------------------------
sessionInfo()

