因此,我想使用来自Wikipedia的数据创建一个图形,我从发现的表中创建了一个数据框。它包含两列-啤酒样式和范围(IBU),例如“ 20-50”。两者都是角色,因此我无法从中得出有意义的图表。我设法将IBU列更改为两个分别为数字(最小值和最大值)的列,但它在我的第一个数据帧内创建了第二个数据帧,试图找到类似的情况,但我不能,我现在被卡住了并且不不知道下一步该怎么做:(抱歉,粘贴了这么多代码,我只想让某人读取数据并查看其结构。
library(xml2)
library(rvest)
library(ggplot2)
library(tidyr)
file_html <- read_html(
"https://pl.wikipedia.org/wiki/International_Bittering_Units",
encoding = "UTF-8")
table_html <- html_node(file_html, "#mw-content-text > div > table")
table_IBU <- html_table(table_html, fill = TRUE)
table_IBU$IBU2 <- str_replace(table_IBU$`Stopie? IBU`, "\\+", "")
table_IBU$IBU3 <- tidyr::separate(table_IBU, IBU2, into = c("min", "max"), sep = " – ")
table_IBU <- subset(table_IBU, select = -c(IBU2,
`Stopie? IBU`,
`Gatunek piwa`))
table_IBU$IBU3$min2 <- as.numeric(table_IBU$IBU3$min)
table_IBU$IBU3$max2 <- as.numeric(table_IBU$IBU3$max)
#graph that I can come up with on my own
IBUgraph <- ggplot(table_IBU$IBU3, aes(reorder(`Gatunek piwa`, + max2),
max2)) +
geom_point(width = 0.5, color = "darkolivegreen",
fill = "darkseagreen4") +
theme(text=element_text(size = 9))
IBUgraph = IBUgraph +
labs(y = "Mi?dzynarodowe Jednostki Goryczy (IBU)",
x = "Gatunek",
title = "Skala IBU - International Bitterness Units,
czyli mi?dzynarodowe jednostki goryczy")
IBUgraph <- IBUgraph + theme(axis.text.x=element_text(angle=45, hjust=1.1))
IBUgraph
Run Code Online (Sandbox Code Playgroud)
最后,我想使用ggplot()在x轴上展示啤酒的样式创建一个图形,每种样式分别显示两个点,分别显示最小的有效值,最大值。
You can do this for example, it's called a dumbbell chart
ggplot(table_IBU$IBU3,aes(x=`Gatunek piwa`)) +
geom_point(aes(y=min2)) + # add point for min
geom_point(aes(y=max2)) + # add point for max
geom_segment(aes(xend=`Gatunek piwa`,y=min2,yend=max2)) + # create segment between min and max
theme(axis.text.x = element_text(angle = 90, hjust = 1)) # rotate x axis
Run Code Online (Sandbox Code Playgroud)
所以,你在寻找这样的东西吗?
library(dplyr)
library(stringr)
library(tidyr)
library(ggplot2)
library(rvest)
#Acquire table
table_IBU <- read_html("https://pl.wikipedia.org/wiki/International_Bittering_Units", encoding = "UTF-8") %>%
html_node(., "#mw-content-text > div > table") %>%
html_table(., fill = TRUE)
#Extract scores into min and max values
table_IBU$IBU2 <- str_replace(table_IBU$`Stopie? IBU`, "\\+", "")
table_IBU %<>% separate(., IBU2, into = c("min", "max"), sep = " – ") %>% select(-c(`Stopie? IBU`))
table_IBU$min <- as.integer(table_IBU$min)
table_IBU$max <- as.integer(table_IBU$max)
table_IBU %<>% gather(data = ., key = "Limit", value = "Value", min, max)
#Plot
table_IBU %>% ggplot(data = ., aes(x = `Gatunek piwa`)) +
geom_point(aes(y = Value, col = Limit)) +
xlab("Type of beer") +
ylab("Score (0-120)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Run Code Online (Sandbox Code Playgroud)
显示这些数据的方式很奇怪。