8 Single-cell transcriptomic data

For use with single-cell transcriptomic data, some functions are prepared. The example data used here is for the demonstrative purpose and the necessary preprocessing should be performed.

8.1 Visualizing textual information for marker genes

Visualize the network for all the cluster’s marker genes.

library(Seurat)
library(patchwork)
library(ggraph)
library(biotextgraph)
library(ggsc) ## Install using BiocManager::install("ggsc")
dir = "filtered_gene_bc_matrices/hg19"
pbmc.data <- Read10X(data.dir = dir)
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k",
                           min.cells=3, min.features=200)
pbmc <- NormalizeData(pbmc)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst")
pbmc <- ScaleData(pbmc, features = row.names(pbmc))
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
pbmc <- FindNeighbors(pbmc, dims = 1:10, verbose = FALSE)
pbmc <- FindClusters(pbmc, resolution = 0.5, verbose = FALSE)
pbmc <- RunTSNE(pbmc, reduction.use = "cca.aligned", dims.use = 1:20, do.fast = TRUE)
DimPlot(pbmc, reduction = "pca",
        label = TRUE, pt.size = 0.5) 
## Or, use ggsc (ggsc, https://yulab-smu.top/ggsc/)
sc_dim(pbmc) + 
    sc_dim_geom_label(geom = shadowtext::geom_shadowtext, 
            color='black', bg.color='white')

TextMarkers function can be used for Seurat results. pvalThresh can be specified.

markers <- FindAllMarkers(pbmc)
texts <- TextMarkers(markers, type="network", pvalThresh=1e-50)
#> 0
#> Input genes: 75
#>   Converted input genes: 75
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Ignoring corThresh, automatically determine the value
#> threshold = 0.514
#> 1
#> Input genes: 197
#>   Converted input genes: 190
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Ignoring corThresh, automatically determine the value
#> threshold = 0.102
#> 2
#> Input genes: 8
#>   Converted input genes: 8
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Ignoring corThresh, automatically determine the value
#> threshold = 0.323
#> 3
#> Input genes: 64
#>   Converted input genes: 60
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Ignoring corThresh, automatically determine the value
#> threshold = 0.101
#> 4
#> Input genes: 16
#>   Converted input genes: 16
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Ignoring corThresh, automatically determine the value
#> threshold = 0.2
#> 5
#> Input genes: 88
#>   Converted input genes: 82
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Ignoring corThresh, automatically determine the value
#> threshold = 0.201
#> 6
#> Input genes: 40
#>   Converted input genes: 39
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Ignoring corThresh, automatically determine the value
#> threshold = 0.1
#> 7
#> Input genes: 9
#>   Converted input genes: 8
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Ignoring corThresh, automatically determine the value
#> threshold = 0.273
#> 8
#> Input genes: 63
#>   Converted input genes: 52
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Ignoring corThresh, automatically determine the value
#> threshold = 0.102
wrap_plots(texts, nrow=3)

Word clouds can also be shown.

texts <- TextMarkers(subset(markers, markers$cluster %in% c("6","8")),
     type="wc", pvalThresh=1e-50, genePlot=FALSE)
#> 6
#> Input genes: 40
#>   Converted input genes: 39
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 8
#> Input genes: 63
#>   Converted input genes: 52
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
wrap_plots(texts)

8.2 findMarkers() in scran

library(SingleCellExperiment)
library(scater)
library(scran)
sce <- as.SingleCellExperiment(pbmc)
colLabels(sce) <- colData(sce)$ident
colData(sce)$label <- colData(sce)$ident

The function TextMarkersScran performs text fetching recursively for each cluster.

marker.info <- findMarkers(sce, colLabels(sce))
texts <- marker.info |> TextMarkersScran(top=5)
#> 0
#> Input genes: 28
#>   Converted input genes: 28
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 1
#> Input genes: 20
#>   Converted input genes: 20
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 2
#> Input genes: 27
#>   Converted input genes: 27
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 3
#> Input genes: 22
#>   Converted input genes: 22
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 4
#> Input genes: 19
#>   Converted input genes: 19
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 5
#> Input genes: 18
#>   Converted input genes: 16
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 6
#> Input genes: 20
#>   Converted input genes: 20
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 7
#> Input genes: 26
#>   Converted input genes: 25
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 8
#> Input genes: 21
#>   Converted input genes: 20
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
texts$`6`

8.3 Map resulting wordcloud plots on the dimensionality reducing plot.

Single-cell level wordclouds can be plotted on the reduced dimension plot of cells. The title in the wordcloud or network can be suppressed by withTitle=FALSE in TextMarkers functions. A function is prepared to generate colors and show word cloud on the reduced dimension plot using the same color as the original plot (plotReducedDimWithTexts). This function can use use_shadowtext in ggwordcloud::geom_text_wordcloud, which is available from the forked repository (noriakis/ggwordcloud). bg.colour is used to specify background color. Otherwise, one can specify ggfx filtering functions to withggfx and ggfxParams as parameters, which reflects to wordcloud.

For SingleCellExperiment,

library(ggfx)
candLabel <- c("0","6")

plotReducedDimWithTexts(sce, marker.info, colour_by="label",point_alpha=0.3,
    use_shadowtext=TRUE, which.label=candLabel)
#> 0
#> Input genes: 28
#>   Converted input genes: 28
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 6
#> Input genes: 20
#>   Converted input genes: 20
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)

## Change the bg.colour
plotReducedDimWithTexts(sce, marker.info, colour_by="label",point_alpha=0.3,
    use_shadowtext=TRUE, bg.colour="tomato",
    which.label=candLabel)
#> 0
#> Input genes: 28
#>   Converted input genes: 28
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 6
#> Input genes: 20
#>   Converted input genes: 20
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)

## Use `with_outer_glow`
plotReducedDimWithTexts(sce, marker.info, colour_by="label",point_alpha=0.3,
    use_shadowtext=FALSE, withggfx="with_outer_glow",
    which.label=candLabel)
#> 0
#> Input genes: 28
#>   Converted input genes: 28
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 6
#> Input genes: 20
#>   Converted input genes: 20
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)

## Use `with_outer_glow` with parameters
## Should only be used for just the visualization purpose
plotReducedDimWithTexts(sce, marker.info, colour_by="label",point_alpha=0.3,
    use_shadowtext=FALSE, withggfx="with_outer_glow", dimred="TSNE",
    ggfxParams=list(colour="white", expand=2),
    which.label=c("2","3"))+
    theme(axis.line=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        legend.position="none",
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank())
#> 2
#> Input genes: 27
#>   Converted input genes: 27
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 3
#> Input genes: 22
#>   Converted input genes: 22
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)


## Change the font
## Use alien encounter fonts (http://www.hipsthetic.com/alien-encounters-free-80s-font-family/)
sysfonts::font_add(family="alien",regular="SFAlienEncounters.ttf")
showtext::showtext_auto()
plotReducedDimWithTexts(sce, marker.info, colour_by="label",point_alpha=0.3,
                        use_shadowtext=TRUE, bg.colour="white", args=list(fontFamily="alien"),
                        which.label=c("6"), wcScale=6)
#> 6
#> Input genes: 20
#>   Converted input genes: 20
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)

For the Seurat object, use DimPlotWithTexts().

r <- rep(4, length(unique(markers$cluster)))
names(r) <- unique(markers$cluster)
DimPlotWithTexts(pbmc, markers, label=FALSE, rad=r, which.label=c("1","4"), wcScale=6)
#> 1
#> Input genes: 1027
#>   Converted input genes: 966
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 4
#> Input genes: 316
#>   Converted input genes: 298
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)

Give the function a data frame that is already filtered, like thersholding by adjusted p-values.

r <- rep(2, length(unique(markers$cluster)))
names(r) <- unique(markers$cluster)
thresh <- subset(markers, markers$p_val_adj<1e-50)
DimPlotWithTexts(pbmc,
    thresh,
    label=FALSE,
    rad=r,
    which.label=c("2","4","6"),
    args=list(numWords=50,scaleFreq=5))
#> 2
#> Input genes: 8
#>   Converted input genes: 8
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 4
#> Input genes: 16
#>   Converted input genes: 16
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 6
#> Input genes: 40
#>   Converted input genes: 39
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)

geom_sc_wordcloud is prepared to use in conjuction with ggsc.

sc_dim(pbmc,reduction = "pca")+
  geom_sc_wordcloud(markers, show_markers = c(3,5),
                    p_val_adj_threshold = 1e-50, wcScale=7)+
  sc_dim_geom_label(geom = shadowtext::geom_shadowtext, 
                  color='black', bg.color='white')
#> 3
#> Input genes: 64
#>   Converted input genes: 60
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 5
#> Input genes: 88
#>   Converted input genes: 82
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)

Some positioning arguments are prepared.

sc_dim(pbmc,reduction = "pca")+
  geom_sc_wordcloud(markers, show_markers = c(4,7),
    p_val_adj_threshold = 1e-50,
    wcScale=7,
    base_dens=TRUE)
#> 4
#> Input genes: 16
#>   Converted input genes: 16
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 7
#> Input genes: 9
#>   Converted input genes: 8
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)

8.4 Plotting marker gene names in plot

You can plot gene names instead of gene description by specifying gene_name=TRUE in geom_sc_wordcloud(). Number of genes can be controlled with geneNum. The genes in each cluster are ordered by sortBy, in decreasing order in default (decreasing option).

sc_dim(pbmc,reduction = "pca")+
    geom_sc_wordcloud(markers, show_markers = c(0,5),
        gene_name = TRUE, wcScale=7,
        p_val_adj_threshold = 1e-50, base_dens = TRUE)

sc_dim(pbmc,reduction = "pca")+
    geom_sc_wordcloud(markers, show_markers = c(0,5),
        gene_name = TRUE, wcScale=7, use_shadowtext=FALSE,
        withggfx="with_outer_glow", ggfxParams=list(colour="white", expand=1.5),
        p_val_adj_threshold = 1e-50, base_dens = TRUE)

Scaling color should be called before geom_sc_wordcloud().

sc_dim(pbmc,reduction = "pca")+
    scale_color_manual(values=RColorBrewer::brewer.pal(9, "PuOr"))+
    geom_sc_wordcloud(markers, show_markers = c(0,5),
        gene_name = TRUE, wcScale=7,
        p_val_adj_threshold = 1e-50, base_dens = TRUE)

8.4.1 Plotting the enrichment analysis results of marker genes

r <- rep(3, length(unique(markers$cluster)))
names(r) <- unique(markers$cluster)

## Enrichment analysis
DimPlotWithTexts(pbmc, thresh, label=FALSE,
    point_alpha = 0.8,which.label = c(0,6),
    args=list(enrich="kegg", scaleFreq=10, numWords=50),
    reduction = "pca", base_dens=TRUE,
    wcScale=7)
#> 0
#> Input genes: 75
#>   Converted input genes: 75
#> Performing enrichment analysis
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> 6
#> Input genes: 40
#>   Converted input genes: 39
#> Performing enrichment analysis
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)