tread_hse <- read_csv("/srv/store/students/vvsuschevskiy/noobsQA/littleone/littleone_csv_with_names/Thread_6941702_full_346_pages.csv")
## Parsed with column specification:
## cols(
## answer_to_post_head = col_character(),
## answer_to_post_link = col_character(),
## answer_to_user = col_character(),
## answer_to_user_link = col_character(),
## guest = col_logical(),
## post_text = col_character(),
## post_time = col_datetime(format = ""),
## user_link = col_character(),
## user_name = col_character(),
## user_title = col_character(),
## Адрес = col_character(),
## Регистрация = col_character(),
## Сообщений = col_double()
## )
tread_hse$uni = "hse"
tread_leti <- read_csv("/srv/store/students/vvsuschevskiy/noobsQA/littleone/littleone_csv_with_names/Thread_7376205_full_529_pages.csv")
## Parsed with column specification:
## cols(
## answer_to_post_head = col_character(),
## answer_to_post_link = col_character(),
## answer_to_user = col_character(),
## answer_to_user_link = col_character(),
## guest = col_logical(),
## post_text = col_character(),
## post_time = col_datetime(format = ""),
## user_link = col_character(),
## user_name = col_character(),
## user_title = col_character(),
## Адрес = col_character(),
## Регистрация = col_character(),
## Сообщений = col_double()
## )
tread_leti$uni = "leti"
tread_poly <- read_csv("/srv/store/students/vvsuschevskiy/noobsQA/littleone/littleone_csv_with_names/Thread_5600277_full_506_pages.csv")
## Parsed with column specification:
## cols(
## answer_to_post_head = col_character(),
## answer_to_post_link = col_character(),
## answer_to_user = col_character(),
## answer_to_user_link = col_character(),
## guest = col_logical(),
## post_text = col_character(),
## post_time = col_datetime(format = ""),
## user_link = col_character(),
## user_name = col_character(),
## user_title = col_character(),
## Адрес = col_character(),
## Регистрация = col_character(),
## Сообщений = col_double()
## )
tread_poly$uni = "poly"
treads = bind_rows(tread_hse, tread_leti, tread_poly) %>%
select(post_text, uni, user_name, answer_to_user, post_time)
remove(tread_hse, tread_leti, tread_poly)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
treads %>%
select(post_time, uni) %>%
mutate(post_time = as.Date(post_time)) %>%
ggplot(aes(x = post_time, group = uni, fill = uni)) +
# geom_density(alpha = 0.4)+
geom_histogram(binwidth = 14, alpha = 0.7)+
theme_minimal()+
scale_x_date(labels = scales::date_format("%Y"), date_breaks = "1 year")+
theme(axis.text.x = element_text(angle = 0))+
geom_vline(xintercept = "2019-09-01 20:34:00 UTC" %>% as.Date(), size = 1, linetype="dashed")+
geom_text(aes(x="2019-09-01 20:34:00 UTC" %>% as.Date(), label="\n1 sept", y=70), colour="black", angle=90, text=element_text(size=14))
## Warning: Ignoring unknown parameters: text
treads %>%
select(-post_text) -> treads_nw
remove(treads)
library(igraph)
library(tidygraph)
##
## Attaching package: 'tidygraph'
## The following objects are masked from 'package:quanteda':
##
## as.igraph, convert
## The following object is masked from 'package:igraph':
##
## groups
## The following object is masked from 'package:stats':
##
## filter
library(ggraph)
nw_hse = treads_nw %>%
filter(uni == "hse") %>%
group_by(user_name, answer_to_user) %>%
summarise(weight = n()) %>%
graph_from_data_frame()
## `summarise()` regrouping output by 'user_name' (override with `.groups` argument)
## Warning in graph_from_data_frame(.): In `d' `NA' elements were replaced with
## string "NA"
nw_hse_tidy <- as_tbl_graph(nw_hse)
nw_hse_tidy %>%
activate(edges) %>%
arrange(desc(weight))
## # A tbl_graph: 204 nodes and 1069 edges
## #
## # A directed multigraph with 1 component
## #
## # Edge Data: 1,069 x 3 (active)
## from to weight
## <int> <int> <int>
## 1 150 2 173
## 2 63 2 91
## 3 122 2 74
## 4 16 2 64
## 5 177 2 43
## 6 27 2 39
## # … with 1,063 more rows
## #
## # Node Data: 204 x 1
## name
## <chr>
## 1 @ревик
## 2 ***@***
## 3 0123456789
## # … with 201 more rows
nw_hse_tidy
## # A tbl_graph: 204 nodes and 1069 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 204 x 1 (active)
## name
## <chr>
## 1 @ревик
## 2 ***@***
## 3 0123456789
## 4 adelina
## 5 AGRIVA
## 6 ak4olatan
## # … with 198 more rows
## #
## # Edge Data: 1,069 x 3
## from to weight
## <int> <int> <int>
## 1 1 20 1
## 2 2 2 15
## 3 2 27 1
## # … with 1,066 more rows
ggraph(nw_hse_tidy) +
geom_edge_link() +
geom_node_point() +
theme_graph()
## Using `stress` as default layout
ggraph(nw_hse_tidy, layout = "graphopt") +
geom_node_point() +
geom_edge_link(aes(width = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = name), repel = TRUE) +
labs(edge_width = "Letters") +
theme_graph()
library(visNetwork)
nw_hse_tidy%>%
visIgraph()