3 Module

Module information can be obtained and parsed. Parsing DEFINITION and REACTION is supported. For the definition, first the function breaks down the definition to blocks, and make graphical representation using ggraph and tbl_graph or text itself using geom_text and geom_rect. By calling module function, kegg_module class object is created.

library(ggkegg)
library(tidygraph)
library(dplyr)
mod <- module("M00004")
mod
#> M00004
#> Pentose phosphate pathway (Pentose phosphate cycle)

The module function creates an object of the kegg_module class, which stores the parsed information of reactions and definitions in its internal slots. By providing this kegg_module object to various functions, it is possible to execute various operations related to the module.

Visualizing the reactions in the module. Please report any reaction that cannot be properly parsed this way.

library(igraph)
mod <- module("M00004")
## Obtain reaction graph
reacg <- attr(mod, "reaction_graph") # or, get_module_attribute()
## Some edges are duplicate and have different reactions,
## so simplify
reacg |>
    convert(to_simple) |>
    activate(edges) |> 
    mutate(reaction=lapply(.orig_data,
        function(x) 
            paste0(unique(x[["reaction"]]),
                collapse=","))) |>
    ggraph()+
        geom_node_point()+
        geom_edge_parallel(
            aes(label=reaction), angle_calc = "along",
            label_dodge = unit(5,"mm"),
            label_colour = "tomato",
            arrow = arrow(length = unit(1, 'mm')),
            end_cap = circle(5, 'mm'),
            start_cap = circle(5, "mm"))+
        geom_node_text(aes(label=name), repel=TRUE,
            bg.colour="white", size=4)+
        theme_void()

3.1 Module definition in graph

We can visualize the module definition in the graph. This will make grouping nodes and nodes leading to the nodes connected with AND argument (‘+’ and ’ ’), and connect the definition. In the example below, the wrapper function plot_module_blocks() visualizes the definitions of each module. Red represents AND relationships, while other relationships (such as compound or OR) are expressed as edge labels.

module("M00009") |>
  obtain_sequential_module_definition() |> ## return tbl_graph
  plot_module_blocks() ## wrapper function

3.2 Module definition in text

We can also visualize the module definition in text.

module("M00004") |>
  module_text() |> ## return data.frame
  plot_module_text() ## wrapper function

3.3 Module completeness

Given a vector of interesting KOs, module completeness can be calculated using boolean expression.

mod <- module("M00009")
query <- sample(attr(mod, "definition_components"), 5) |>
  strsplit(":") |>
  sapply("[",2)
query
#> [1] "K00164" "K05942" "K00658" "K00024" "K18860"
mod |>
  module_completeness(query) |>
  kableExtra::kable()
block all_num present_num ratio complete
(K01647,K05942) 2 1 0.5000000 TRUE
(K01681,K01682) 2 0 0.0000000 FALSE
(K00031,K00030) 2 0 0.0000000 FALSE
((K00164+K00658,K01616)+K00382,K00174+K00175-K00177-K00176) 8 2 0.2500000 FALSE
(K01902+K01903,K01899+K01900,K18118) 5 0 0.0000000 FALSE
(K00234+K00235+K00236+(K00237,K25801),K00239+K00240+K00241-(K00242,K18859,K18860),K00244+K00245+K00246-K00247) 15 1 0.0666667 FALSE
(K01676,K01679,K01677+K01678) 4 0 0.0000000 FALSE
(K00026,K00025,K00024,K00116) 4 1 0.2500000 TRUE

3.3.1 Assessing module completeness across multiple microbial genomes

For instance, we can assess completeness from KOs inferred from multiple species genome. Here, we mapped EC numbers available in the annotation file in MIDAS pipeline, which is obtained from PATRIC server, to KOs and calculate completeness for randomly obtained species.

## Load pre-computed KOs, and recursively perform completeness calculation.
mf <- list.files("../")
mf <- mf[startsWith(mf, "M")]
annos <- list()

candspid <- list.files("../species_dir")
candspid <- sample(candspid, 10)

## Obtain EC to KO mapping file from KEGG REST API
mapper <- data.table::fread("https://rest.kegg.jp/link/ec/ko", header=FALSE)

suppressMessages(
  for (i in candspid) {
    mcs <- NULL
    df <- read.table(paste0("../species_dir/",i), sep="\t", header=1)
    fid <- paste0("ec:",df[df$ontology=="ec",]$function_id)
    kos <- mapper[mapper$V2 %in% fid,]$V1 |> strsplit(":") |> sapply("[",2) |> unique()
    for (mid in mf) {
      mc <- module_completeness(module(mid, directory="../"),
                                query = kos)
      mcs <- c(mcs, mc$complete |> mean()) ## Mean of blocks
    }
    annos[[as.character(i)]] <- mcs
  }
)

We will next visualize the results using ComplexHeatmap and simplifyEnrichment. We will plot the word cloud of module description alongside heatmap by simplifyEnrichment, for determined clusters.

library(ComplexHeatmap)

## Make data.frame
hdf <- data.frame(annos, check.names=FALSE)
row.names(hdf) <- mf
hdf[is.na(hdf)] <- 0
hdf <- hdf[apply(hdf, 1, sum)!=0,]

## Prepare for word cloud annotation
moddesc <- data.table::fread("https://rest.kegg.jp/list/module", header=FALSE)

## Obtain K-means clustering
km = kmeans(hdf, centers = 10)$cluster

gene_list <- split(row.names(hdf), km)
gene_list <- lapply(gene_list, function(x) {
  x[!is.na(x)]
})

annotList <- list()
for (i in names(gene_list)) {
  maps <- (moddesc |> dplyr::filter(V1 %in% gene_list[[i]]))$V2
  annotList[[i]] <-  maps
}

col_fun = circlize::colorRamp2(c(0, 0.5, 1),
                               c(scales::muted("blue"), "white", scales::muted("red")))

ht1 <- Heatmap(hdf, show_column_names = TRUE,
               col=col_fun, row_split=km,
               heatmap_legend_param = list(
                 legend_direction = "horizontal", 
                 legend_width = unit(5, "cm")
               ),
               rect_gp = gpar(col = "white", lwd = 2),
               name="Module completeness", border=TRUE,
               column_names_max_height =unit(10,"cm"))+
  rowAnnotation(
    keywords = simplifyEnrichment::anno_word_cloud(align_to = km,
                                                   term=annotList,
                                                   exclude_words=c("pathway","degradation",
                                                                   "biosynthesis"),
                                                   max_words = 40,fontsize_range = c(5,20))
  )
ht1

3.4 Module abundance and pathway abundance calculation

vec <- c(0.1, 0.5)
names(vec) <- c("K00234","K01676")
module_abundance("M00009", vec)
#> [1] 0.3947368

3.5 Visualize the result of enricher

If you performed some experiments involving KEGG Orthology, and performed enrichment analysis on KO to module relationship, ggkegg function accepts the results and plot text-based or network-based graph where KOs are highlighted.

library(BiocFileCache)
#> Loading required package: dbplyr
#> 
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#> 
#>     ident, sql
library(clusterProfiler)
#> clusterProfiler v4.12.2 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
#> 
#> Please cite:
#> 
#> Guangchuang Yu, Li-Gen Wang, Yanyan Han and Qing-Yu
#> He. clusterProfiler: an R package for comparing
#> biological themes among gene clusters. OMICS: A
#> Journal of Integrative Biology 2012, 16(5):284-287
#> 
#> Attaching package: 'clusterProfiler'
#> The following object is masked from 'package:igraph':
#> 
#>     simplify
#> The following object is masked from 'package:stats':
#> 
#>     filter

## Download and cache KO to module relationship
url <- paste0("https://rest.kegg.jp/link/ko/module")
bfc <- BiocFileCache()
path <- bfcrpath(bfc, url)
module.bg <- apply(data.table::fread(path), 2, function(x) sapply(strsplit(x, ":"), "[",2))|> data.frame()|>`colnames<-`(c("term","gene"))

## Using the table, perform enrichment analysis.
mod.enrich <- enricher(c("K00431","K00832"), TERM2GENE=module.bg)

## Visualize using ggkegg
dd <- ggkegg(mod.enrich)
dd