# ця бібліотека дозволяє підключатися до Facebook API
# інсталювати краще версію з гітхабу, вона актуальніша
library(devtools)
install_github("pablobarbera/Rfacebook/Rfacebook")
## Skipping install of 'Rfacebook' from a github remote, the SHA1 (c7cd323d) has not changed since last install.
## Use `force = TRUE` to force installation
library(Rfacebook)
## Loading required package: httr
## Loading required package: rjson
## Loading required package: httpuv
## Warning: package 'httpuv' was built under R version 3.3.2
##
## Attaching package: 'Rfacebook'
## The following object is masked from 'package:methods':
##
## getGroup
# бібліотека для різних маніпуляцій з даними
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.2
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.3.2
## Warning: package 'tibble' was built under R version 3.3.2
## Warning: package 'tidyr' was built under R version 3.3.2
## Warning: package 'readr' was built under R version 3.3.2
## Warning: package 'purrr' was built under R version 3.3.2
## Warning: package 'dplyr' was built under R version 3.3.2
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
# наші файли з даними груп
fb <- readxl::read_excel("fb_maidan.xlsx")
# токен отримуємо тут - https://developers.facebook.com/tools/explorer/
token <- "EAACEdEose0cBAOHNi9onKY8kyZCZBcNZAsI1ZBRJvMpc2yEKTPzcZB6GBCHEOsmqDUuY5oszZAGZAbFIewz0QbzsRrPgw9odVlkUHQi8E7KHIa6o0Ek6ubITlzAHbhhvXiOw8FtZAOPuwLQGMYMZBvZAGHd0zKzgySTLIuIsETvYVzrPmRJcJxUdnqxrzPE3YEiS0ZD"
Створимо тестові масиви.
#приклад групи
example_fb <- getGroup("202155769980250",token,n = 1000)
#члени групи
#залежить від налаштувань групи - може віддаватися інфа про всіх членів групи, а може - ні
members <- getMembers("202155769980250",token,n = 10000000)
#приклад пабліку
example_fb_page <-getPage("Euromaydan.Odessa",token,n = 10000000000000000)
Подивимось на структуру отриманих масивів
glimpse(example_fb)
## Observations: 1,000
## Variables: 10
## $ from_id <chr> "135146317033206", "706464436081662", "31182211...
## $ from_name <chr> "Елизавета Сидорова", "Дмитрий Фортунатов", "Ar...
## $ message <chr> "http://media-collider.com/tr/2017/07/17/turech...
## $ created_time <chr> "2017-07-25T14:24:31+0000", "2017-07-25T13:55:0...
## $ type <chr> "link", "link", "link", "video", "link", "link"...
## $ link <chr> "http://media-collider.com/tr/2017/07/17/turech...
## $ id <chr> "202155769980250_695391420656680", "20215576998...
## $ likes_count <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 3, 0, 0, 0, 2,...
## $ comments_count <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ shares_count <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,...
glimpse(members)
## Observations: 1
## Variables: 5
## $ name <chr> "Михаил Михайлов"
## $ first_name <chr> "Михаил"
## $ last_name <chr> "Михайлов"
## $ id <chr> "237075823291013"
## $ administrator <lgl> FALSE
glimpse(example_fb_page)
## Observations: 1,347
## Variables: 10
## $ from_id <chr> "736807679676151", "736807679676151", "73680767...
## $ from_name <chr> "Евромайдан Одесса", "Евромайдан Одесса", "Евро...
## $ message <chr> NA, "http://m.ostro.org/video/527833/", NA, NA,...
## $ created_time <chr> "2017-07-01T16:08:57+0000", "2017-06-30T09:28:5...
## $ type <chr> "link", "link", "photo", "link", "link", "link"...
## $ link <chr> "http://www.megafon.od.ua/2017/07/01/v-odesskoi...
## $ id <chr> "736807679676151_1696992750324301", "7368076796...
## $ likes_count <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ comments_count <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ shares_count <dbl> 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
Інфу про лайки, коментарі та поширення потрібно вантажити окремо
#пишу відразу як прописувати масову викачку
posts <- lapply(example_fb$id,function(x)getPost(x,token,1000000000000000))
likes <- do.call("rbind",lapply(1:length(posts), function(i) posts[[i]]$likes))
comments <- likes <- do.call("rbind",lapply(1:length(posts), function(i) posts[[i]]$comments))
#шери окремо читаються
shares <- do.call("rbind",lapply(example_fb$id,function(x)getShares(x,token,1000000000000000)))
Поглянемо на структуру цих масивів
glimpse(likes)
## Observations: 136
## Variables: 7
## $ from_id <chr> "524386777696235", "829292903786912", "73716717...
## $ from_name <chr> "Simon Ivanov", "Aleksandr Trotsyuk", "Олександ...
## $ message <chr> "Всегда это \"чуть\"", "http://www.metacafe.com...
## $ created_time <chr> "2017-07-25T14:10:43+0000", "2017-07-23T23:25:2...
## $ likes_count <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,...
## $ comments_count <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,...
## $ id <chr> "695385490657273", "694555190740303", "69315548...
glimpse(comments)
## Observations: 136
## Variables: 7
## $ from_id <chr> "524386777696235", "829292903786912", "73716717...
## $ from_name <chr> "Simon Ivanov", "Aleksandr Trotsyuk", "Олександ...
## $ message <chr> "Всегда это \"чуть\"", "http://www.metacafe.com...
## $ created_time <chr> "2017-07-25T14:10:43+0000", "2017-07-23T23:25:2...
## $ likes_count <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,...
## $ comments_count <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,...
## $ id <chr> "695385490657273", "694555190740303", "69315548...
glimpse(shares)
## Observations: 5
## Variables: 4
## $ from_id <chr> "664422470424233", "664422470424233", "66442247042...
## $ from_name <chr> "Сплетни Новогродовка", "Сплетни Новогродовка", "С...
## $ shared_time <lgl> NA, NA, NA, NA, NA
## $ id <chr> "664422470424233_682928161906997", "66442247042423...
З вк трохи складніше все, готової бібліотеки немає, тому спираємося на свої сили.
Функція для отримання постів зі сторінки
get_wall_posts <- function(id_min, id_max, id_step=100){
extended <- paste0('extended=', 0)
copy_depth <- paste0('copy_history_depth=', 1)
id_lo=id_min;id_hi=id_min+id_step-1
cat(id_min,'-',id_max,': ')
while (id_lo < id_max) {
cat(min(id_hi, id_max), '. ')
posts_range <- id_lo:id_hi
posts <- paste0('posts=', paste0('-', group_id, '_', posts_range,
collapse=','))
request <- paste('https://api.vk.com/method/wall.getById?v=4.9',
posts, extended, copy_depth, access_token, sep='&')
posts_list <- fromJSON(getURL(request))
if (id_lo == id_min)
df <- wall2df(posts_list$response)
else
df <- rbind(df, wall2df(posts_list$response))
if (id_hi < id_max) Sys.sleep(sleep_time)
id_lo <- id_lo+id_step
id_hi <- id_hi+id_step
}
df
}
wall2df <- function(wall){
df <- data.frame(uid=rep(0, length(wall)))
i <- 0
for (wall_post in wall){
i <- i + 1
df$uid[i] <- wall_post$id
df$author[i] <- wall_post$from_id
df$whodidthis[i] <- ifelse(is.null(wall_post$created_by),
ifelse(is.null(wall_post$signer_id),
NA, wall_post$signer_id),
wall_post$created_by)
df$type[i] <- wall_post$post_type
df$comments[i] <- wall_post$comments[["count"]]
df$likes[i] <- wall_post$likes[["count"]]
df$reposts[i] <- wall_post$reposts[["count"]]
df$date[i] <- wall_post$date
df$text[i] <- wall_post$text
}
df
}
group_id <- 33305945 # id групи Karlsberg
id_min <- 1
id_max <- 2
posts <- get_wall_posts(id_min, id_max)
get_likers_commenters <- function(posts){
posts_likers_commenters <- list()
cat('1-', dim(posts)[1], ': ', sep='')
for (i in 1:dim(posts)[1]){
request_likers <- paste0('https://api.vk.com/method/likes.getList?owner_id=-',
group_id, '&type=post&item_id=', posts$uid[i])
likers <- fromJSON(getURL(request_likers))$response$users
request_comments <- paste0('https://api.vk.com/method/wall.getComments?v=5.50&owner_id=-',
group_id, '&post_id=', posts$uid[i])
comments <- fromJSON(getURL(request_comments))
commenters <- c()
comments_ids <- c()
comments_likers <- c()
if (comments$response$count){
commenters <- sapply(comments$response$items,
function(comment) comment$from_id)
comments_ids <- sapply(comments$response$items,
function(comment) comment$id)
for (comment_id in comments_ids) {
request_comments_likers <- paste0(
'https://api.vk.com/method/likes.getList?owner_id=-',
group_id, '&type=comment&item_id=',
comment_id)
comments_likers = c(comments_likers,
unlist(fromJSON(getURL(request_comments_likers))$response$users))
}
}
posts_likers_commenters[[i]] <- list(likers = likers,
commenters = commenters,
comments_likers = comments_likers)
if( i %% 25 == 0) cat(i, ' . ')
if( i %% 200 == 0) Sys.sleep(10)
}
posts_likers_commenters
}
countries <- rename(countries, country_id=cid, country = name)
cities <- rename(cities, city_id=cid, city = name)
countries$country_id <- as.integer(countries$country_id)
cities$city_id <- as.integer(cities$city_id)
members <- left_join(members, cities, by = 'city_id')
members <- left_join(members, countries, by = 'country_id')
members$country[is.na(members$country)] <- 'не вказана'
members$city[is.na(members$city)] <- 'не вказане'
members$age <- floor(as.numeric(difftime(now(), members$bdate, units = 'days'))/365.25)
members$age[members$age > 100] <- NA
#Додамо все це у первинну таблицю даних
posts$likers <- sapply(posts_likers_commenters, function(plc) plc$likers)
posts$commenters <- sapply(posts_likers_commenters, function(plc) plc$commenters)
posts$comments_likers <- sapply(posts_likers_commenters, function(plc) plc$comments_likers)