Chapter 7 Comparing networks
7.1 Comparing multiple networks
The inference including hundreds of genes should be interpreted with caution especially for the low bootstrap number. For assessing the network stability, the users can compare and output F-measure by the function compareBNs, which accepts the list of multiple networks (bn) and output the pairwise F-measures. In this analysis, We perform the inference five times for 100 and 500 bootstrap numbers in the specific reactome.
fvres <- c()
cl <- parallel::makeCluster(12)
for (R in c(200,500)){
n1 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
n2 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
n3 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
n4 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
n5 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
geneNum <- length(n1$av$nodes)
nets <- list(n1$av, n2$av, n3$av, n4$av, n5$av)
igList <- list(as.igraph(n1$av), as.igraph(n2$av), as.igraph(n3$av), as.igraph(n4$av), as.igraph(n5$av))
allnets <- nets
fvalues <- compareBNs(nets)
fvres <- rbind(fvres, c("TCGA", 13, geneNum, R, "default", mean(fvalues), sd(fvalues)))
## Threshold 0.8
n1av <- averaged.network(n1$str, threshold=0.8)
n2av <- averaged.network(n2$str, threshold=0.8)
n3av <- averaged.network(n3$str, threshold=0.8)
n4av <- averaged.network(n4$str, threshold=0.8)
n5av <- averaged.network(n5$str, threshold=0.8)
nets <- list(n1av, n2av, n3av, n4av, n5av)
fvalues <- compareBNs(nets)
fvres <- rbind(fvres, c("TCGA", 13, geneNum, R, 0.8, mean(fvalues), sd(fvalues)))
}
resDF <- data.frame(fvres)
colnames(resDF) <- c("dataset","path_number","gene_number","R","threshold","mean","sd")
pathway_name <- sapply(resDF$path_number, function(x) pway@result$Description[as.numeric(x)])
resDF$label <- paste0(resDF$gene_number, "\n(", stringr::str_wrap(pathway_name,10),")")
parallel::stopCluster(cl)
kable(resDF)| dataset | path_number | gene_number | R | threshold | mean | sd | label |
|---|---|---|---|---|---|---|---|
| TCGA | 13 | 48 | 200 | default | 0.879001362810166 | 0.0268588783377082 | 48 (RHO GTPases Activate Formins) |
| TCGA | 13 | 48 | 200 | 0.8 | 0.910779196545874 | 0.0154934340979147 | 48 (RHO GTPases Activate Formins) |
| TCGA | 13 | 48 | 500 | default | 0.910205528150544 | 0.00830851399185569 | 48 (RHO GTPases Activate Formins) |
| TCGA | 13 | 48 | 500 | 0.8 | 0.936679898625571 | 0.0195192542150308 | 48 (RHO GTPases Activate Formins) |
Plot the result for the visual assessment.
defa <- resDF %>% filter(threshold=="default") %>%
ggplot(aes(x=label,
y=as.numeric(mean),
group=dataset,
color=dataset,
bg.color=dataset,
segment.color=dataset)
)+
geom_jitter(size=4, width=0.1) +
theme_minimal() +
geom_text_repel(aes(label = R),
color = "white", size=4,
bg.r = .15) +
scale_color_brewer(palette = "Set1",
name = "Dataset",
# The same color scall will apply to both of these aesthetics.
aesthetics = c("color","bg.color","segment.color"))+
geom_hline(yintercept=0.8, lty=3, color="red", size=1) +
xlab("Gene number") + ylab("F-measure") + ggtitle("Default strength threshold")
high <- resDF %>% filter(threshold=="0.8") %>%
ggplot(aes(x=label,
y=as.numeric(mean),
group=dataset,
color=dataset,
bg.color=dataset,
segment.color=dataset)
)+
geom_jitter(size=4, width=0.1) +
theme_minimal() +
geom_text_repel(aes(label = R),
color = "white", size=4,
bg.r = .15) +
scale_color_brewer(palette = "Set1",
name = "Dataset",
# The same color scall will apply to both of these aesthetics.
aesthetics = c("color","bg.color","segment.color"))+
geom_hline(yintercept=0.8, lty=3, color="red", size=1) +
xlab("Gene number") + ylab("F-measure") + ggtitle("Strength threshold = 0.8")
defa / high
Take the intersection of networks.
as.bn(Reduce(igraph::intersection, igList))FALSE
FALSE Random/Generated Bayesian network
FALSE
FALSE model:
FALSE [NUP107][CDC20][KNL1][KIF2C|CDC20][BUB1B|KNL1][BIRC5|CDC20:KIF2C][CENPF|KNL1:KIF2C:BUB1B]
FALSE [CDCA8|CDC20:KIF2C][ZWILCH|KNL1:BUB1B][CENPA|BIRC5:CDCA8:KIF2C][CENPE|CENPF:KNL1:BUB1B]
FALSE [BUB1|CENPF:KIF2C:BUB1B][AURKB|BIRC5:CDC20][CENPI|CDCA8:KNL1:CENPE][KIF18A|CENPE:AURKB]
FALSE [CENPK|CENPE:BUB1B][NUP85|BIRC5:CENPE][SPC25|BIRC5:AURKB][MAD2L1|BIRC5:CENPE:ZWILCH]
FALSE [SPDL1|CENPK:BUB1B][DSN1|NUP85:KIF2C][SPC24|BIRC5:KIF2C:SPC25:AURKB]
FALSE [SGO2|KIF18A:KNL1:BUB1:AURKB][PLK1|BIRC5:CENPA:CDC20:SPC25][TUBA1C|CDC20:SPC25]
FALSE [NUP133|CENPF:CENPE:SPC24][SRF|TUBA1C][SGO1|TUBA1C:BUB1][CENPU|DSN1:MAD2L1]
FALSE [CENPH|BIRC5:CENPA:CENPK:SPC25:SPC24:MAD2L1:ZWILCH]
FALSE [KNTC1|BIRC5:CENPK:NUP85:KNL1:CENPE:DSN1][NUP37|CENPF:CENPH:TUBA1C:ZWILCH:KNTC1]
FALSE [CENPO|CENPA:NUP85:CENPU:MAD2L1][RHOB|SRF:SGO1][INCENP|CDCA8:KNL1:CENPH:SPC24:MAD2L1:BUB1]
FALSE [SKA1|BIRC5:SGO1:SPC24:BUB1:ZWILCH][CKAP5|NUP133:KIF18A:DSN1]
FALSE [NDC80|CENPA:CENPK:SKA1:SPC24:MAD2L1:AURKB][XPO1|CENPO:CENPE:SPC24:SGO2]
FALSE [NUF2|CENPA:CENPF:SGO1:SKA1:TUBA1C:AURKB][BUB3|NUP37][ERCC6L|CENPI:SRF:SGO1:SKA1:TUBA1C]
FALSE [CENPQ|NUP37:NDC80:CENPI:MAD2L1][ZWINT|NDC80:SGO1:DSN1][DIAPH3|NDC80:CENPI:SRF:SGO2]
FALSE [AHCTF1|NDC80:BIRC5:CENPF:CENPE][CENPL|XPO1:CENPO:NUF2:DSN1:AHCTF1:CKAP5]
FALSE [RCC2|NUP133:CENPI:CENPF:CDCA8:DIAPH3:CENPU:BUB1]
FALSE nodes: 48
FALSE arcs: 150
FALSE undirected arcs: 0
FALSE directed arcs: 150
FALSE average markov blanket size: 12.42
FALSE average neighbourhood size: 6.25
FALSE average branching factor: 3.12
FALSE
FALSE generation algorithm: Empty
The same can be done for bnpathplot.
cl <- parallel::makeCluster(12)
n1 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n2 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n3 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n4 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n5 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
nets <- list(n1$av, n2$av, n3$av, n4$av, n5$av)
fvalues <- compareBNs(nets)
mean(fvalues)FALSE [1] 0.9752001
parallel::stopCluster(cl)7.2 Testing R
We provide the diagnostic function of inferred network across multiple bootstrap numbers. We can test how R values affect the resulting network. It is very time consuming to test many genes, thus the setting up cl argument using library parallel is suggested. We can specify which scoring function to use by scoreType. Additionally, returned strength data frame as well as raw data frame can be used to assess various metrics and thresholds like structural hamming distance.
cl <- parallel::makeCluster(6)
pathTest <- bnpathtest(results = pway, algo="hc",
exp = vsted, Rrange = seq(10, 300, 10), nCategory = 15,
expSample = incSample,
expRow = "ENSEMBL", cl = cl)
# Plot
difEdges <- sapply(pathTest$graph, function(x) sapply(pathTest$graph, function(y) length(E(difference(as.igraph(x), as.igraph(y))))))
difEdges <- data.frame(difEdges[dim(difEdges)[1], colnames(difEdges)])
colnames(difEdges) <- c("NumDif")
ggplot(difEdges, aes(x=as.numeric(substring(rownames(difEdges), 2)), y=NumDif)) +
geom_line(group=1)+
geom_point(aes(color=NumDif), size=5)+
scale_color_gradient(low = "blue", high = "red", name="Number of different edges", guide = guide_legend())+
theme_bw()+xlab("R")+ylab("Number of different edges")+
theme(legend.position="bottom")
parallel::stopCluster(cl)## Which pathway?
paste(pway@result[15,]$Description, pway@result[15,]$Count)FALSE [1] "Homologous DNA Pairing and Strand Exchange 22"
## From 10 to 200
cl = parallel::makeCluster(6)
geneTest <- bngenetest(results = pway, algo="hc", pathNum=15,
exp = vsted, Rrange=seq(10, 200, 10),
expSample = incSample,
expRow = "ENSEMBL", cl = cl)
difSHD <- sapply(geneTest$graph, function(x) sapply(geneTest$graph, function(y) shd(x, y)))
# Plot SHD compared to the highest R network
difSHD <- data.frame(difSHD[dim(difSHD)[1], colnames(difSHD)])
colnames(difSHD) <- c("SHD")
ggplot(difSHD, aes(x=as.numeric(substring(rownames(difSHD), 2)), y=SHD)) +
geom_line(group=1)+
geom_point(aes(color=SHD), size=5)+
scale_color_gradient(low = "blue", high = "red", name="SHD", guide = guide_legend())+
theme_bw()+xlab("R")+ylab("structural hamming distance")+
theme(legend.position="bottom")
parallel::stopCluster(cl)