我正在使用此代码从旅行顾问中提取数据。
install.packages("rvest")
library(rvest)
install.packages("xmlparsedata")
library(xmlparsedata)
install.packages("xml2")
library(xml2)
install.packages("XML")
library(XML)
url.1 <- "https://www.tripadvisor.ie/Restaurant_Review-g186605-d4046860-
Reviews-The_Stage_Door_Cafe-Dublin_County_Dublin.html"
reviews <- url.1 %>%
read_html() %>%
html_nodes("#REVIEWS .innerBubble")
id <- reviews %>%
html_node(".quote a") %>%
html_attr("id")
quote <- reviews %>%
html_node(".quote span") %>%
html_text()
rating <- reviews %>%
html_node(".rating .rating.bubble") %>%
html_attr("alt") %>%
gsub(" of 5 stars", "", .) %>%
as.integer()
date <- reviews %>%
html_node(".ratingDate .relativeDate") %>%
html_attr("title") %>%
strptime("%b %d, %Y") %>%
as.POSIXct()
review <- reviews %>%
html_node(".entry .partial_entry" ) %>%
html_text()
a.1 <- data.frame(id, …
Run Code Online (Sandbox Code Playgroud) r ×1