Chapter 7 Comparing networks
7.1 Comparing multiple networks
The inference including hundreds of genes should be interpreted with caution especially for the low bootstrap number. For assessing the network stability, the users can compare and output F-measure by the function compareBNs
, which accepts the list of multiple networks (bn) and output the pairwise F-measures. In this analysis, We perform the inference five times for 100 and 500 bootstrap numbers in the specific reactome.
<- c()
fvres <- parallel::makeCluster(12)
cl for (R in c(200,500)){
<- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
n1 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
n2 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
n3 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
n4 <- bngeneplot(pway, vstedTCGA, pathNum=13, returnNet=T, R=R, cl=cl)
n5 <- length(n1$av$nodes)
geneNum
<- list(n1$av, n2$av, n3$av, n4$av, n5$av)
nets <- list(as.igraph(n1$av), as.igraph(n2$av), as.igraph(n3$av), as.igraph(n4$av), as.igraph(n5$av))
igList <- nets
allnets <- compareBNs(nets)
fvalues <- rbind(fvres, c("TCGA", 13, geneNum, R, "default", mean(fvalues), sd(fvalues)))
fvres
## Threshold 0.8
<- averaged.network(n1$str, threshold=0.8)
n1av <- averaged.network(n2$str, threshold=0.8)
n2av <- averaged.network(n3$str, threshold=0.8)
n3av <- averaged.network(n4$str, threshold=0.8)
n4av <- averaged.network(n5$str, threshold=0.8)
n5av
<- list(n1av, n2av, n3av, n4av, n5av)
nets <- compareBNs(nets)
fvalues <- rbind(fvres, c("TCGA", 13, geneNum, R, 0.8, mean(fvalues), sd(fvalues)))
fvres
}<- data.frame(fvres)
resDF colnames(resDF) <- c("dataset","path_number","gene_number","R","threshold","mean","sd")
<- sapply(resDF$path_number, function(x) pway@result$Description[as.numeric(x)])
pathway_name $label <- paste0(resDF$gene_number, "\n(", stringr::str_wrap(pathway_name,10),")")
resDF::stopCluster(cl)
parallelkable(resDF)
dataset | path_number | gene_number | R | threshold | mean | sd | label |
---|---|---|---|---|---|---|---|
TCGA | 13 | 48 | 200 | default | 0.879001362810166 | 0.0268588783377082 | 48 (RHO GTPases Activate Formins) |
TCGA | 13 | 48 | 200 | 0.8 | 0.910779196545874 | 0.0154934340979147 | 48 (RHO GTPases Activate Formins) |
TCGA | 13 | 48 | 500 | default | 0.910205528150544 | 0.00830851399185569 | 48 (RHO GTPases Activate Formins) |
TCGA | 13 | 48 | 500 | 0.8 | 0.936679898625571 | 0.0195192542150308 | 48 (RHO GTPases Activate Formins) |
Plot the result for the visual assessment.
<- resDF %>% filter(threshold=="default") %>%
defa ggplot(aes(x=label,
y=as.numeric(mean),
group=dataset,
color=dataset,
bg.color=dataset,
segment.color=dataset)
+
)geom_jitter(size=4, width=0.1) +
theme_minimal() +
geom_text_repel(aes(label = R),
color = "white", size=4,
bg.r = .15) +
scale_color_brewer(palette = "Set1",
name = "Dataset",
# The same color scall will apply to both of these aesthetics.
aesthetics = c("color","bg.color","segment.color"))+
geom_hline(yintercept=0.8, lty=3, color="red", size=1) +
xlab("Gene number") + ylab("F-measure") + ggtitle("Default strength threshold")
<- resDF %>% filter(threshold=="0.8") %>%
high ggplot(aes(x=label,
y=as.numeric(mean),
group=dataset,
color=dataset,
bg.color=dataset,
segment.color=dataset)
+
)geom_jitter(size=4, width=0.1) +
theme_minimal() +
geom_text_repel(aes(label = R),
color = "white", size=4,
bg.r = .15) +
scale_color_brewer(palette = "Set1",
name = "Dataset",
# The same color scall will apply to both of these aesthetics.
aesthetics = c("color","bg.color","segment.color"))+
geom_hline(yintercept=0.8, lty=3, color="red", size=1) +
xlab("Gene number") + ylab("F-measure") + ggtitle("Strength threshold = 0.8")
/ high defa
Take the intersection of networks.
as.bn(Reduce(igraph::intersection, igList))
FALSE
FALSE Random/Generated Bayesian network
FALSE
FALSE model:
FALSE [NUP107][CDC20][KNL1][KIF2C|CDC20][BUB1B|KNL1][BIRC5|CDC20:KIF2C][CENPF|KNL1:KIF2C:BUB1B]
FALSE [CDCA8|CDC20:KIF2C][ZWILCH|KNL1:BUB1B][CENPA|BIRC5:CDCA8:KIF2C][CENPE|CENPF:KNL1:BUB1B]
FALSE [BUB1|CENPF:KIF2C:BUB1B][AURKB|BIRC5:CDC20][CENPI|CDCA8:KNL1:CENPE][KIF18A|CENPE:AURKB]
FALSE [CENPK|CENPE:BUB1B][NUP85|BIRC5:CENPE][SPC25|BIRC5:AURKB][MAD2L1|BIRC5:CENPE:ZWILCH]
FALSE [SPDL1|CENPK:BUB1B][DSN1|NUP85:KIF2C][SPC24|BIRC5:KIF2C:SPC25:AURKB]
FALSE [SGO2|KIF18A:KNL1:BUB1:AURKB][PLK1|BIRC5:CENPA:CDC20:SPC25][TUBA1C|CDC20:SPC25]
FALSE [NUP133|CENPF:CENPE:SPC24][SRF|TUBA1C][SGO1|TUBA1C:BUB1][CENPU|DSN1:MAD2L1]
FALSE [CENPH|BIRC5:CENPA:CENPK:SPC25:SPC24:MAD2L1:ZWILCH]
FALSE [KNTC1|BIRC5:CENPK:NUP85:KNL1:CENPE:DSN1][NUP37|CENPF:CENPH:TUBA1C:ZWILCH:KNTC1]
FALSE [CENPO|CENPA:NUP85:CENPU:MAD2L1][RHOB|SRF:SGO1][INCENP|CDCA8:KNL1:CENPH:SPC24:MAD2L1:BUB1]
FALSE [SKA1|BIRC5:SGO1:SPC24:BUB1:ZWILCH][CKAP5|NUP133:KIF18A:DSN1]
FALSE [NDC80|CENPA:CENPK:SKA1:SPC24:MAD2L1:AURKB][XPO1|CENPO:CENPE:SPC24:SGO2]
FALSE [NUF2|CENPA:CENPF:SGO1:SKA1:TUBA1C:AURKB][BUB3|NUP37][ERCC6L|CENPI:SRF:SGO1:SKA1:TUBA1C]
FALSE [CENPQ|NUP37:NDC80:CENPI:MAD2L1][ZWINT|NDC80:SGO1:DSN1][DIAPH3|NDC80:CENPI:SRF:SGO2]
FALSE [AHCTF1|NDC80:BIRC5:CENPF:CENPE][CENPL|XPO1:CENPO:NUF2:DSN1:AHCTF1:CKAP5]
FALSE [RCC2|NUP133:CENPI:CENPF:CDCA8:DIAPH3:CENPU:BUB1]
FALSE nodes: 48
FALSE arcs: 150
FALSE undirected arcs: 0
FALSE directed arcs: 150
FALSE average markov blanket size: 12.42
FALSE average neighbourhood size: 6.25
FALSE average branching factor: 3.12
FALSE
FALSE generation algorithm: Empty
The same can be done for bnpathplot
.
<- parallel::makeCluster(12)
cl <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n1 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n2 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n3 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n4 <- bnpathplot(pway, vstedTCGA, nCategory = 15, returnNet=T, R=200, cl=cl)
n5 <- list(n1$av, n2$av, n3$av, n4$av, n5$av)
nets <- compareBNs(nets)
fvalues mean(fvalues)
FALSE [1] 0.9752001
::stopCluster(cl) parallel
7.2 Testing R
We provide the diagnostic function of inferred network across multiple bootstrap numbers. We can test how R values affect the resulting network. It is very time consuming to test many genes, thus the setting up cl
argument using library parallel
is suggested. We can specify which scoring function to use by scoreType
. Additionally, returned strength data frame as well as raw data frame can be used to assess various metrics and thresholds like structural hamming distance.
<- parallel::makeCluster(6)
cl <- bnpathtest(results = pway, algo="hc",
pathTest exp = vsted, Rrange = seq(10, 300, 10), nCategory = 15,
expSample = incSample,
expRow = "ENSEMBL", cl = cl)
# Plot
<- sapply(pathTest$graph, function(x) sapply(pathTest$graph, function(y) length(E(difference(as.igraph(x), as.igraph(y))))))
difEdges <- data.frame(difEdges[dim(difEdges)[1], colnames(difEdges)])
difEdges colnames(difEdges) <- c("NumDif")
ggplot(difEdges, aes(x=as.numeric(substring(rownames(difEdges), 2)), y=NumDif)) +
geom_line(group=1)+
geom_point(aes(color=NumDif), size=5)+
scale_color_gradient(low = "blue", high = "red", name="Number of different edges", guide = guide_legend())+
theme_bw()+xlab("R")+ylab("Number of different edges")+
theme(legend.position="bottom")
::stopCluster(cl) parallel
## Which pathway?
paste(pway@result[15,]$Description, pway@result[15,]$Count)
FALSE [1] "Homologous DNA Pairing and Strand Exchange 22"
## From 10 to 200
= parallel::makeCluster(6)
cl <- bngenetest(results = pway, algo="hc", pathNum=15,
geneTest exp = vsted, Rrange=seq(10, 200, 10),
expSample = incSample,
expRow = "ENSEMBL", cl = cl)
<- sapply(geneTest$graph, function(x) sapply(geneTest$graph, function(y) shd(x, y)))
difSHD # Plot SHD compared to the highest R network
<- data.frame(difSHD[dim(difSHD)[1], colnames(difSHD)])
difSHD colnames(difSHD) <- c("SHD")
ggplot(difSHD, aes(x=as.numeric(substring(rownames(difSHD), 2)), y=SHD)) +
geom_line(group=1)+
geom_point(aes(color=SHD), size=5)+
scale_color_gradient(low = "blue", high = "red", name="SHD", guide = guide_legend())+
theme_bw()+xlab("R")+ylab("structural hamming distance")+
theme(legend.position="bottom")
::stopCluster(cl) parallel