Chapter 7 Comparing networks
7.1 Comparing multiple networks
The inference including hundreds of genes should be interpreted with caution especially for the low bootstrap number. For assessing the network stability, the users can compare and output F-measure by the function compareBNs, which accepts the list of multiple networks (bn) and output the pairwise F-measures. In this analysis, We perform the inference five times for 100 and 500 bootstrap numbers in the specific reactome.
fvres <- c()
cl <- parallel::makeCluster(12)
for (R in c(200,500)){
  n1 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
  n2 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
  n3 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
  n4 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
  n5 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
  geneNum <- length(n1$av$nodes)
  
  nets <- list(n1$av, n2$av, n3$av, n4$av, n5$av)
  igList <- list(as.igraph(n1$av), as.igraph(n2$av), as.igraph(n3$av), as.igraph(n4$av), as.igraph(n5$av))
  allnets <- nets
  fvalues <- compareBNs(nets)
  fvres <- rbind(fvres, c("TCGA", 13, geneNum, R, "default", mean(fvalues), sd(fvalues)))
  
  ## Threshold 0.8
  n1av <- averaged.network(n1$str, threshold=0.8)
  n2av <- averaged.network(n2$str, threshold=0.8)
  n3av <- averaged.network(n3$str, threshold=0.8)
  n4av <- averaged.network(n4$str, threshold=0.8)
  n5av <- averaged.network(n5$str, threshold=0.8)
  
  nets <- list(n1av, n2av, n3av, n4av, n5av)
  fvalues <- compareBNs(nets)
  fvres <- rbind(fvres, c("TCGA", 13, geneNum, R, 0.8, mean(fvalues), sd(fvalues)))
}
resDF <- data.frame(fvres)
colnames(resDF) <- c("dataset","path_number","gene_number","R","threshold","mean","sd")
pathway_name <- sapply(resDF$path_number, function(x) pway@result$Description[as.numeric(x)])
resDF$label <- paste0(resDF$gene_number, "\n(", stringr::str_wrap(pathway_name,10),")")
parallel::stopCluster(cl)
kable(resDF)| dataset | path_number | gene_number | R | threshold | mean | sd | label | 
|---|---|---|---|---|---|---|---|
| TCGA | 13 | 48 | 200 | default | 0.879001362810166 | 0.0268588783377082 | 48 (RHO GTPases Activate Formins) | 
| TCGA | 13 | 48 | 200 | 0.8 | 0.910779196545874 | 0.0154934340979147 | 48 (RHO GTPases Activate Formins) | 
| TCGA | 13 | 48 | 500 | default | 0.910205528150544 | 0.00830851399185569 | 48 (RHO GTPases Activate Formins) | 
| TCGA | 13 | 48 | 500 | 0.8 | 0.936679898625571 | 0.0195192542150308 | 48 (RHO GTPases Activate Formins) | 
Plot the result for the visual assessment.
defa <- resDF %>% filter(threshold=="default") %>%
    ggplot(aes(x=label,
               y=as.numeric(mean),
               group=dataset,
               color=dataset,
               bg.color=dataset,
               segment.color=dataset)
           )+
    geom_jitter(size=4, width=0.1) +
    theme_minimal() +
    geom_text_repel(aes(label = R),
                     color = "white", size=4,
                     bg.r = .15) + 
    scale_color_brewer(palette = "Set1",
                         name = "Dataset",
                         # The same color scall will apply to both of these aesthetics.
                         aesthetics = c("color","bg.color","segment.color"))+
    geom_hline(yintercept=0.8, lty=3, color="red", size=1) +
        xlab("Gene number") + ylab("F-measure") + ggtitle("Default strength threshold")
high <- resDF %>% filter(threshold=="0.8") %>%
    ggplot(aes(x=label,
               y=as.numeric(mean),
               group=dataset,
               color=dataset,
               bg.color=dataset,
               segment.color=dataset)
    )+
    geom_jitter(size=4, width=0.1) +
    theme_minimal() +
    
    geom_text_repel(aes(label = R),
                    color = "white", size=4,
                    bg.r = .15) + 
    scale_color_brewer(palette = "Set1",
                       name = "Dataset",
                       # The same color scall will apply to both of these aesthetics.
                       aesthetics = c("color","bg.color","segment.color"))+
    geom_hline(yintercept=0.8, lty=3, color="red", size=1) +
    xlab("Gene number") + ylab("F-measure") + ggtitle("Strength threshold = 0.8")
defa / high
Take the intersection of networks.
as.bn(Reduce(igraph::intersection, igList))FALSE 
FALSE   Random/Generated Bayesian network
FALSE 
FALSE   model:
FALSE    [NUP107][CDC20][KNL1][KIF2C|CDC20][BUB1B|KNL1][BIRC5|CDC20:KIF2C][CENPF|KNL1:KIF2C:BUB1B]
FALSE    [CDCA8|CDC20:KIF2C][ZWILCH|KNL1:BUB1B][CENPA|BIRC5:CDCA8:KIF2C][CENPE|CENPF:KNL1:BUB1B]
FALSE    [BUB1|CENPF:KIF2C:BUB1B][AURKB|BIRC5:CDC20][CENPI|CDCA8:KNL1:CENPE][KIF18A|CENPE:AURKB]
FALSE    [CENPK|CENPE:BUB1B][NUP85|BIRC5:CENPE][SPC25|BIRC5:AURKB][MAD2L1|BIRC5:CENPE:ZWILCH]
FALSE    [SPDL1|CENPK:BUB1B][DSN1|NUP85:KIF2C][SPC24|BIRC5:KIF2C:SPC25:AURKB]
FALSE    [SGO2|KIF18A:KNL1:BUB1:AURKB][PLK1|BIRC5:CENPA:CDC20:SPC25][TUBA1C|CDC20:SPC25]
FALSE    [NUP133|CENPF:CENPE:SPC24][SRF|TUBA1C][SGO1|TUBA1C:BUB1][CENPU|DSN1:MAD2L1]
FALSE    [CENPH|BIRC5:CENPA:CENPK:SPC25:SPC24:MAD2L1:ZWILCH]
FALSE    [KNTC1|BIRC5:CENPK:NUP85:KNL1:CENPE:DSN1][NUP37|CENPF:CENPH:TUBA1C:ZWILCH:KNTC1]
FALSE    [CENPO|CENPA:NUP85:CENPU:MAD2L1][RHOB|SRF:SGO1][INCENP|CDCA8:KNL1:CENPH:SPC24:MAD2L1:BUB1]
FALSE    [SKA1|BIRC5:SGO1:SPC24:BUB1:ZWILCH][CKAP5|NUP133:KIF18A:DSN1]
FALSE    [NDC80|CENPA:CENPK:SKA1:SPC24:MAD2L1:AURKB][XPO1|CENPO:CENPE:SPC24:SGO2]
FALSE    [NUF2|CENPA:CENPF:SGO1:SKA1:TUBA1C:AURKB][BUB3|NUP37][ERCC6L|CENPI:SRF:SGO1:SKA1:TUBA1C]
FALSE    [CENPQ|NUP37:NDC80:CENPI:MAD2L1][ZWINT|NDC80:SGO1:DSN1][DIAPH3|NDC80:CENPI:SRF:SGO2]
FALSE    [AHCTF1|NDC80:BIRC5:CENPF:CENPE][CENPL|XPO1:CENPO:NUF2:DSN1:AHCTF1:CKAP5]
FALSE    [RCC2|NUP133:CENPI:CENPF:CDCA8:DIAPH3:CENPU:BUB1]
FALSE   nodes:                                 48 
FALSE   arcs:                                  150 
FALSE     undirected arcs:                     0 
FALSE     directed arcs:                       150 
FALSE   average markov blanket size:           12.42 
FALSE   average neighbourhood size:            6.25 
FALSE   average branching factor:              3.12 
FALSE 
FALSE   generation algorithm:                  Empty
The same can be done for bnpathplot.
cl <- parallel::makeCluster(12)
n1 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n2 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n3 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n4 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n5 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
nets <- list(n1$av, n2$av, n3$av, n4$av, n5$av)
fvalues <- compareBNs(nets)
mean(fvalues)FALSE [1] 0.9752001
parallel::stopCluster(cl)7.2 Testing R
We provide the diagnostic function of inferred network across multiple bootstrap numbers. We can test how R values affect the resulting network. It is very time consuming to test many genes, thus the setting up cl argument using library parallel is suggested. We can specify which scoring function to use by scoreType. Additionally, returned strength data frame as well as raw data frame can be used to assess various metrics and thresholds like structural hamming distance.
cl <- parallel::makeCluster(6)
pathTest <- bnpathtest(results = pway, algo="hc",
           exp = vsted, Rrange = seq(10, 300, 10), nCategory = 15,
           expSample = incSample,
           expRow = "ENSEMBL", cl = cl)
# Plot
difEdges <- sapply(pathTest$graph, function(x) sapply(pathTest$graph, function(y) length(E(difference(as.igraph(x), as.igraph(y))))))
difEdges <- data.frame(difEdges[dim(difEdges)[1], colnames(difEdges)])
colnames(difEdges) <- c("NumDif")
ggplot(difEdges, aes(x=as.numeric(substring(rownames(difEdges), 2)), y=NumDif)) +
  geom_line(group=1)+
  geom_point(aes(color=NumDif), size=5)+
  scale_color_gradient(low = "blue", high = "red", name="Number of different edges", guide = guide_legend())+
  theme_bw()+xlab("R")+ylab("Number of different edges")+
  theme(legend.position="bottom")
parallel::stopCluster(cl)## Which pathway?
paste(pway@result[15,]$Description, pway@result[15,]$Count)FALSE [1] "Homologous DNA Pairing and Strand Exchange 22"
## From 10 to 200
cl = parallel::makeCluster(6)
geneTest <- bngenetest(results = pway, algo="hc", pathNum=15,
           exp = vsted, Rrange=seq(10, 200, 10),
           expSample = incSample,
           expRow = "ENSEMBL", cl = cl)
difSHD <- sapply(geneTest$graph, function(x) sapply(geneTest$graph, function(y) shd(x, y)))
# Plot SHD compared to the highest R network
difSHD <- data.frame(difSHD[dim(difSHD)[1], colnames(difSHD)])
colnames(difSHD) <- c("SHD")
ggplot(difSHD, aes(x=as.numeric(substring(rownames(difSHD), 2)), y=SHD)) +
  geom_line(group=1)+
  geom_point(aes(color=SHD), size=5)+
  scale_color_gradient(low = "blue", high = "red", name="SHD", guide = guide_legend())+
  theme_bw()+xlab("R")+ylab("structural hamming distance")+
  theme(legend.position="bottom")
parallel::stopCluster(cl)