mtr*_*reg 7 r plotly sunburst-diagram r-plotly
我正在尝试通过 R 使用 Plotly 制作森伯斯特图。我正在努力处理层次结构所需的数据模型,无论是在概念化它是如何工作的方面,还是看看是否有任何简单的方法来转换常规数据框,用代表不同层次级别的列,转换为所需的格式。
我已经查看了 R 中绘图森伯斯特图表的示例,例如,here,并查看了参考页面,但没有完全获得数据格式的模型。
# Create some fake data - say ownership and land use data with acreage
df <- data.frame(ownership=c(rep("private", 3), rep("public",3),rep("mixed", 3)),
landuse=c(rep(c("residential", "recreation", "commercial"),3)),
acres=c(108,143,102, 300,320,500, 37,58,90))
# Just try some quick pie charts of acreage by landuse and ownership
plot_ly(data=df, labels= ~landuse, values= ~acres, type='pie')
plot_ly(data=df, labels= ~ownership, values= ~acres, type='pie')
# This doesn't render anything... not that I'd expect it to given the data format doesn't seem to match what's needed,
# but this is what I'd intuitively expect to work
plot_ly(data=df, labels= ~landuse, parents = ~ownership, values= ~acres, type='sunburst')
Run Code Online (Sandbox Code Playgroud)
根据上面的示例代码或类似代码,了解如何从数据 ( df
) 转换为绘图森伯斯特图所需的格式会很有帮助。
你是绝对正确的,与 plotly 的 R API 的其余直觉用法相比,为森伯斯特(或树状图)图表准备数据相当烦人。
我遇到了同样的问题,写了一个基于library(data.table)
准备数据的函数,接受两种不同的data.frame
输入格式。
所需的格式生成使用类似结构的数据旭日情节一样你可以看到这里下的部分旭日反复标签。
对于您的示例,它应该如下所示:
labels values parents ids
1: total 1658 <NA> total
2: private 353 total total - private
3: public 1120 total total - public
4: mixed 185 total total - mixed
5: residential 108 total - private total - private - residential
6: recreation 143 total - private total - private - recreation
7: commercial 102 total - private total - private - commercial
8: residential 300 total - public total - public - residential
9: recreation 320 total - public total - public - recreation
10: commercial 500 total - public total - public - commercial
11: residential 37 total - mixed total - mixed - residential
12: recreation 58 total - mixed total - mixed - recreation
13: commercial 90 total - mixed total - mixed - commercial
Run Code Online (Sandbox Code Playgroud)
这是到达那里的代码:
library(data.table)
library(plotly)
DF <- data.table(ownership=c(rep("private", 3), rep("public",3),rep("mixed", 3)),
landuse=c(rep(c("residential", "recreation", "commercial"),3)),
acres=c(108, 143, 102, 300, 320, 500, 37, 58, 90))
as.sunburstDF <- function(DF, value_column = NULL, add_root = FALSE){
require(data.table)
colNamesDF <- names(DF)
if(is.data.table(DF)){
DT <- copy(DF)
} else {
DT <- data.table(DF, stringsAsFactors = FALSE)
}
if(add_root){
DT[, root := "Total"]
}
colNamesDT <- names(DT)
hierarchy_columns <- setdiff(colNamesDT, value_column)
DT[, (hierarchy_columns) := lapply(.SD, as.factor), .SDcols = hierarchy_columns]
if(is.null(value_column) && add_root){
setcolorder(DT, c("root", colNamesDF))
} else if(!is.null(value_column) && !add_root) {
setnames(DT, value_column, "values", skip_absent=TRUE)
setcolorder(DT, c(setdiff(colNamesDF, value_column), "values"))
} else if(!is.null(value_column) && add_root) {
setnames(DT, value_column, "values", skip_absent=TRUE)
setcolorder(DT, c("root", setdiff(colNamesDF, value_column), "values"))
}
hierarchyList <- list()
for(i in seq_along(hierarchy_columns)){
current_columns <- colNamesDT[1:i]
if(is.null(value_column)){
currentDT <- unique(DT[, ..current_columns][, values := .N, by = current_columns], by = current_columns)
} else {
currentDT <- DT[, lapply(.SD, sum, na.rm = TRUE), by=current_columns, .SDcols = "values"]
}
setnames(currentDT, length(current_columns), "labels")
hierarchyList[[i]] <- currentDT
}
hierarchyDT <- rbindlist(hierarchyList, use.names = TRUE, fill = TRUE)
parent_columns <- setdiff(names(hierarchyDT), c("labels", "values", value_column))
hierarchyDT[, parents := apply(.SD, 1, function(x){fifelse(all(is.na(x)), yes = NA_character_, no = paste(x[!is.na(x)], sep = ":", collapse = " - "))}), .SDcols = parent_columns]
hierarchyDT[, ids := apply(.SD, 1, function(x){paste(x[!is.na(x)], collapse = " - ")}), .SDcols = c("parents", "labels")]
hierarchyDT[, c(parent_columns) := NULL]
return(hierarchyDT)
}
sunburstDF <- as.sunburstDF(DF, value_column = "acres", add_root = TRUE)
plot_ly(data = sunburstDF, ids = ~ids, labels= ~labels, parents = ~parents, values= ~values, type='sunburst', branchvalues = 'total')
Run Code Online (Sandbox Code Playgroud)
下面是data.frame
函数接受的第二种格式的示例( value_column = NULL
,因为它是根据数据计算出来的):
DF2 <- data.frame(sample(LETTERS[1:3], 100, replace = TRUE),
sample(LETTERS[4:6], 100, replace = TRUE),
sample(LETTERS[7:9], 100, replace = TRUE),
sample(LETTERS[10:12], 100, replace = TRUE),
sample(LETTERS[13:15], 100, replace = TRUE),
stringsAsFactors = FALSE)
plot_ly(data = as.sunburstDF(DF2, add_root = TRUE), ids = ~ids, labels= ~labels, parents = ~parents, values= ~values, type='sunburst', branchvalues = 'total')
Run Code Online (Sandbox Code Playgroud)
另请参阅 library( sunburstR ) 作为替代方案。
编辑:添加了一个关于基于 dplyr 的count_to_sunburst()
函数的基准library(plotme)
(见下文),在我的系统上它比data.table
版本慢 5 倍左右。
Unit: milliseconds
expr min lq mean median uq max neval
plotme 50.4618 53.09425 60.92404 55.37815 63.62315 122.3842 100
ismirsehregal 8.6553 10.28870 12.63881 11.53760 12.26620 108.2025 100
Run Code Online (Sandbox Code Playgroud)
重现基准的代码:
# devtools::install_github("yogevherz/plotme")
library(microbenchmark)
library(plotme)
library(dplyr)
library(data.table)
library(plotly)
DF <- data.frame(ownership=c(rep("private", 3), rep("public",3),rep("mixed", 3)),
landuse=c(rep(c("residential", "recreation", "commercial"),3)),
acres=c(108, 143, 102, 300, 320, 500, 37, 58, 90))
as.sunburstDF <- function(DF, value_column = NULL, add_root = FALSE){
require(data.table)
colNamesDF <- names(DF)
if(is.data.table(DF)){
DT <- copy(DF)
} else {
DT <- data.table(DF, stringsAsFactors = FALSE)
}
if(add_root){
DT[, root := "Total"]
}
colNamesDT <- names(DT)
hierarchy_columns <- setdiff(colNamesDT, value_column)
DT[, (hierarchy_columns) := lapply(.SD, as.factor), .SDcols = hierarchy_columns]
if(is.null(value_column) && add_root){
setcolorder(DT, c("root", colNamesDF))
} else if(!is.null(value_column) && !add_root) {
setnames(DT, value_column, "values", skip_absent=TRUE)
setcolorder(DT, c(setdiff(colNamesDF, value_column), "values"))
} else if(!is.null(value_column) && add_root) {
setnames(DT, value_column, "values", skip_absent=TRUE)
setcolorder(DT, c("root", setdiff(colNamesDF, value_column), "values"))
}
hierarchyList <- list()
for(i in seq_along(hierarchy_columns)){
current_columns <- colNamesDT[1:i]
if(is.null(value_column)){
currentDT <- unique(DT[, ..current_columns][, values := .N, by = current_columns], by = current_columns)
} else {
currentDT <- DT[, lapply(.SD, sum, na.rm = TRUE), by=current_columns, .SDcols = "values"]
}
setnames(currentDT, length(current_columns), "labels")
hierarchyList[[i]] <- currentDT
}
hierarchyDT <- rbindlist(hierarchyList, use.names = TRUE, fill = TRUE)
parent_columns <- setdiff(names(hierarchyDT), c("labels", "values", value_column))
hierarchyDT[, parents := apply(.SD, 1, function(x){fifelse(all(is.na(x)), yes = NA_character_, no = paste(x[!is.na(x)], sep = ":", collapse = " - "))}), .SDcols = parent_columns]
hierarchyDT[, ids := apply(.SD, 1, function(x){paste(x[!is.na(x)], collapse = " - ")}), .SDcols = c("parents", "labels")]
hierarchyDT[, c(parent_columns) := NULL]
return(hierarchyDT)
}
microbenchmark(plotme = {
DF %>%
rename(n = acres) %>%
count_to_sunburst()
}, ismirsehregal = {
plot_ly(data = as.sunburstDF(DF, value_column = "acres", add_root = TRUE), ids = ~ids, labels= ~labels, parents = ~parents, values= ~values, type='sunburst', branchvalues = 'total')
})
Run Code Online (Sandbox Code Playgroud)