小编hsr*_*ran的帖子

使用 r 抓取数据时处理 tripadvisor 中的“更多”按钮

我正在使用此代码从旅行顾问中提取数据。

install.packages("rvest")
library(rvest)
install.packages("xmlparsedata")
library(xmlparsedata)
install.packages("xml2")
library(xml2)
install.packages("XML")
library(XML)

url.1 <- "https://www.tripadvisor.ie/Restaurant_Review-g186605-d4046860- 
Reviews-The_Stage_Door_Cafe-Dublin_County_Dublin.html"

reviews <- url.1 %>%
read_html() %>%
html_nodes("#REVIEWS .innerBubble")

id <- reviews %>%
html_node(".quote a") %>%
html_attr("id")

quote <- reviews %>%
html_node(".quote span") %>%
html_text()

rating <- reviews %>%
html_node(".rating .rating.bubble") %>%
html_attr("alt") %>%
gsub(" of 5 stars", "", .) %>%
as.integer()

date <- reviews %>%
html_node(".ratingDate .relativeDate") %>%
html_attr("title") %>%
strptime("%b %d, %Y") %>%
as.POSIXct()

review <- reviews %>%
html_node(".entry .partial_entry" ) %>%
html_text()

a.1 <- data.frame(id, …
Run Code Online (Sandbox Code Playgroud)

r

5
推荐指数
0
解决办法
439
查看次数

标签 统计

r ×1