"J'ai effectué un scraping en R mais je rencontre le problème de diviser les données en différentes colonnes. Je ne parviens pas à écrire le code pour la colonne 8:10 (dernière ligne du code). Ci-dessous le code"
library(xml2)
library(rvest)
library(stringr)
library(tidyr)
reddit_wbpg <- read_html("https://www.tripadvisor.in/Hotel_Review-g304551-d3583700-Reviews-or10-Lemon_Tree_Premier_Delhi_Airport-New_Delhi_National_Capital_Territory_of_Delhi.html")
title <- reddit_wbpg %>%
html_node("title") %>%
html_text()
reviews <- reddit_wbpg %>%
html_nodes("q.location-review-review-list-parts-ExpandableReview__reviewText--gOmRC") %>%
html_text()
user_data1 <- reddit_wbpg %>%
html_nodes("div.social-member-event-MemberEventOnObjectBlock__event_type--3njyv") %>%
html_text()
user_data2 <- reddit_wbpg %>%
html_nodes("div.social-member-MemberHeaderStats__event_info--30wFs") %>%
html_text()
review_title <- reddit_wbpg %>%
html_nodes("div.location-review-review-list-parts-ReviewTitle__reviewTitle--2GO9Z") %>%
html_text()
scraping_data <- data.frame(page_title= title, review_title = review_title, reviews = reviews, user_data1 = user_data1,user_data2 = user_data2)
scraping_data <- cbind(scraping_data,"a","a","a","a","a")
colnames(scraping_data)[6:10] <- c("user_name", "date", "location", "contribution" , "helpful_votes")
scraping_data[,6:7] <- str_split_fixed(scraping_data$user_data1, " wrote a review", 2)
scraping_data[,8] <- str_extract(scraping_data$user_data2,"^.+?(?=[0-9]+ [hc])")
scraping_data[,9] <- str_extract(scraping_data$user_data2,"[0-9]+(?= contributions)")
scraping_data[,10] <- str_extract(scraping_data$user_data2,"[0-9]+(?= helpful votes)")
Le résultat est visible dans l'image ci-jointe :