5 Interactive inspection of gene cluster network annotated by words

library(biotextgraph)
library(ggplot2)
library(ggraph)
library(org.Hs.eg.db)
library(clusterProfiler)
library(RColorBrewer)

In this example, a Bayesian network showing the module eigengenes relationship are inferred using boot.strength function in bnlearn from the weighted gene correlation network analysis (WGCNA) results. The modules are annotated by word clouds produced by refseq(), and can be exported to the format of Cytoscape.js or vis.js. In this way, module relationship can be interactively inspected with the functional implications. The other functions like pubmed() can be used, however, you shold specify API keys for the function makes multiple queries.

## In this example, we simulate WGCNA results.
## you can just use results from WGCNA.
## Assuming WGCNA results are stored in `mod`
mod <- biotextgraph::returnExample()
MEs <- mod$MEs
modColors <- mod$colors
ensg <- names(modColors)

# library(bnlearn)
library(igraph)

## Replace like boot.strength(mod$MEs, R=500, algorithm = "hc")
# dag <- model2network("[ME1][ME2|ME1]") # If using bnlearn
g <- graph_from_literal( ME1-+ME2, ME1-+ME3 )

## Convert to igraph
# g <- as.igraph(dag)

## Assign edge attributes
## Skip, if you perform boot.strength, the edge attributes can be added from the result
# el <- data.frame(as_edgelist(g))
# colnames(el) <- c("from","to")
# el <- left_join(el, bs)
# E(g)$strength <- el$strength
# E(g)$direction <- el$direction

## Node attributes
V(g)$stripName <- gsub("ME","",V(g)$name)
sizes <- table(modColors)
V(g)$size <- as.numeric(sizes[V(g)$stripName])

## Directory to save images and a script
rootDir <- "./"
netDir <- "visCyjs"
imageDir <- "images"

dir.create(paste0(rootDir, "/", netDir))
dir.create(paste0(rootDir, netDir, "/", imageDir))

images <- c()
plotType <- "bar"
numLim <- 200 # limit for gene number
for (i in V(g)$name){
    print(i)
    i <- as.numeric(gsub("ME","",i)) # strip ME

    queries <- ensg[modColors==i]
    if (length(queries)>numLim) {
        warning("Sampling random genes")
        queries <- queries[sample(1:length(queries), numLim)] ## Temporary restrict to randomly chosen genes, should be replaced to like kME values
    }
    
    ## Convert to ENTREZ
    entre <- AnnotationDbi::select(org.Hs.eg.db, keytype="ENSEMBL",
        keys = queries, columns = "ENTREZID")$ENTREZID
    
    if (plotType=="bar"){
        plt <- makeBar(entre, keyType="ENTREZID") # get barplot
    } else { ## If wordcloud
        # A <- refseq(entre, keyType="ENTREZID",
        #                    argList=list(rot.per=0.4,
        #                                 colors=brewer.pal(10,
        #                                                   sample(row.names(RColorBrewer::brewer.pal.info), 1)),
        #                                 random.order=FALSE),
        #                    numWords=80)
        # # plt <- plotWC(A)
        # # 
        # # ## This time use ggwordcloud()
        # plt <- ggwordcloud::ggwordcloud(getSlot(A, "freqDf")$word, getSlot(A, "freqDf")$freq,
        #                      shape="circle", min.freq = 1,max.words = Inf,
        #                      rot.per = 0.5, random.order = FALSE,
        #                      colors = brewer.pal(10,
        #                                          sample(row.names(RColorBrewer::brewer.pal.info), 1)))+
        #          scale_size_area(max_size = 40)
    }
    ## Save images
    ggsave(paste0(rootDir, netDir, "/", imageDir, "/", i ,".png"),
           plt, dpi=300, width=10, height=10)
    ## Store image dir
    images <- c(images, paste0(imageDir, "/", i ,".png"))
}
#> [1] "ME1"
#> Input genes: 12
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> [1] "ME2"
#> Input genes: 13
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> [1] "ME3"
#> Input genes: 7
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
V(g)$image <- images

## Node shape
if (plotType=="bar"){
    V(g)$shape <- rep("rectangle", length(V(g))) 
} else {
    V(g)$shape <- rep("circle", length(V(g)))
}

## Scale the node size
sizeMin <- 50
sizeMax <- 200
rawMin <- min(V(g)$size)
rawMax <- max(V(g)$size)
scf <- (sizeMax-sizeMin)/(rawMax-rawMin)
V(g)$size <- scf * V(g)$size + sizeMin - scf * rawMin

## Export
exportCyjs(g, rootDir, netDir)
# or, exportVisjs(g, rootDir, netDir)

Use like http-server in the directory containing a exported JavaScript, and interactively inspect the module relationship with word information. The example visualization is shown below (not by the code above, but Bayesian network of module eigenes inferred from RNA-seq dataset ofbladder cancer).

Example visualization of a Bayesian network

Interactive inspection is possible using GitHub pages or the other hosting services like below.

knitr::include_url("https://noriakis.github.io/cyjs_test/wordcloud")

If you specify node attribute named group, and set bubble=TRUE in exportCyjs function, bubble sets will be plotted using cytoscape.js-bubblesets, useful for inspecting the similarity betwteen the gene cluster, like the output of pvclust and pvpick on module eigengenes.

V(g)$group <- c(1, 1, NA)
# exportCyjs(g, rootDir, netDir, bubble=TRUE)

## Example, not by the code above.
knitr::include_url("https://noriakis.github.io/cyjs_test/wordcloud_bubble")

vis.js can be used, by exporting function exportVisjs. In this example, the barplot of words are shown in the nodes.

knitr::include_url("https://noriakis.github.io/cyjs_test/visjs")

5.1 Wrapper function for wordcloud network

The network like the previous example can be conveniently exported using exportWCNetwork function, which wrapped the previous code. The input is igraph and named gene list.

mod <- biotextgraph::returnExample()
#> 'select()' returned 1:many mapping between keys and
#> columns
#> 'select()' returned 1:1 mapping between keys and
#> columns
#> 'select()' returned 1:1 mapping between keys and
#> columns
g <- graph_from_literal( ME1-+ME2, ME1-+ME3 )
geneList <- list("ME1"=mod$colors[mod$colors==1] |> names(),
     "ME2"=mod$colors[mod$colors==2] |> names(),
     "ME3"=mod$colors[mod$colors==3] |> names())
g
#> IGRAPH 7dedebd DN-- 3 2 -- 
#> + attr: name (v/c)
#> + edges from 7dedebd (vertex names):
#> [1] ME1->ME2 ME1->ME3
geneList
#> $ME1
#>  [1] "ENSG00000108702" "ENSG00000108691" "ENSG00000277632"
#>  [4] "ENSG00000278567" "ENSG00000274221" "ENSG00000275302"
#>  [7] "ENSG00000277943" "ENSG00000275824" "ENSG00000271503"
#> [10] "ENSG00000274233" "ENSG00000108688" "ENSG00000108700"
#> 
#> $ME2
#>  [1] "ENSG00000163739" "ENSG00000081041" "ENSG00000163734"
#>  [4] "ENSG00000163735" "ENSG00000124875" "ENSG00000169429"
#>  [7] "ENSG00000138755" "ENSG00000169245" "ENSG00000169248"
#> [10] "ENSG00000107562" "ENSG00000156234" "ENSG00000145824"
#> [13] "ENSG00000161921"
#> 
#> $ME3
#> [1] "ENSG00000012061" "ENSG00000104884" "ENSG00000163161"
#> [4] "ENSG00000175595" "ENSG00000134899" "ENSG00000225830"
#> [7] "ENSG00000049167"
exportWCNetwork(g,geneList,keyType="ENSEMBL", wcScale=50)
#> Warning in brewer.pal(10, sample(row.names(RColorBrewer::brewer.pal.info), : n too large, allowed maximum for palette Oranges is 9
#> Returning the palette you asked for with that many colors
#> Warning in brewer.pal(10, sample(row.names(RColorBrewer::brewer.pal.info), : n too large, allowed maximum for palette Blues is 9
#> Returning the palette you asked for with that many colors
#> Warning in dir.create(paste0(dir)): 'network' already
#> exists
#> Warning in dir.create(paste0(dir, "/images")):
#> 'network\images' already exists
#> Input genes: 12
#> 'select()' returned 1:1 mapping between keys and
#> columns
#>   Converted input genes: 7
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Scale for size is already present.
#> Adding another scale for size, which will replace the
#> existing scale.
#> Warning in wordcloud_boxes(data_points =
#> points_valid_first, boxes = boxes, : Some words could not
#> fit on page. They have been removed.
#> Input genes: 13
#> 'select()' returned 1:1 mapping between keys and
#> columns
#>   Converted input genes: 13
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Scale for size is already present.
#> Adding another scale for size, which will replace the
#> existing scale.
#> Warning in wordcloud_boxes(data_points =
#> points_valid_first, boxes = boxes, : Some words could not
#> fit on page. They have been removed.
#> Input genes: 7
#> 'select()' returned 1:1 mapping between keys and
#> columns
#>   Converted input genes: 7
#> Filter based on GeneSummary
#> Filtered 77 words (frequency and/or tfidf)
#> Scale for size is already present.
#> Adding another scale for size, which will replace the
#> existing scale.
#> Warning in wordcloud_boxes(data_points =
#> points_valid_first, boxes = boxes, : One word could not fit
#> on page. It has been removed.

4 The other usage for microbiome-related data

6 Annotating gene cluster dendrogram