R Markdown

Start

Data

tread_hse <- read_csv("/srv/store/students/vvsuschevskiy/noobsQA/littleone/littleone_csv_with_names/Thread_6941702_full_346_pages.csv")
## Parsed with column specification:
## cols(
##   answer_to_post_head = col_character(),
##   answer_to_post_link = col_character(),
##   answer_to_user = col_character(),
##   answer_to_user_link = col_character(),
##   guest = col_logical(),
##   post_text = col_character(),
##   post_time = col_datetime(format = ""),
##   user_link = col_character(),
##   user_name = col_character(),
##   user_title = col_character(),
##   Адрес = col_character(),
##   Регистрация = col_character(),
##   Сообщений = col_double()
## )
tread_hse$uni = "hse"

tread_leti <- read_csv("/srv/store/students/vvsuschevskiy/noobsQA/littleone/littleone_csv_with_names/Thread_7376205_full_529_pages.csv")
## Parsed with column specification:
## cols(
##   answer_to_post_head = col_character(),
##   answer_to_post_link = col_character(),
##   answer_to_user = col_character(),
##   answer_to_user_link = col_character(),
##   guest = col_logical(),
##   post_text = col_character(),
##   post_time = col_datetime(format = ""),
##   user_link = col_character(),
##   user_name = col_character(),
##   user_title = col_character(),
##   Адрес = col_character(),
##   Регистрация = col_character(),
##   Сообщений = col_double()
## )
tread_leti$uni = "leti"

tread_poly <- read_csv("/srv/store/students/vvsuschevskiy/noobsQA/littleone/littleone_csv_with_names/Thread_5600277_full_506_pages.csv")
## Parsed with column specification:
## cols(
##   answer_to_post_head = col_character(),
##   answer_to_post_link = col_character(),
##   answer_to_user = col_character(),
##   answer_to_user_link = col_character(),
##   guest = col_logical(),
##   post_text = col_character(),
##   post_time = col_datetime(format = ""),
##   user_link = col_character(),
##   user_name = col_character(),
##   user_title = col_character(),
##   Адрес = col_character(),
##   Регистрация = col_character(),
##   Сообщений = col_double()
## )
tread_poly$uni = "poly"

treads = bind_rows(tread_hse, tread_leti, tread_poly) %>% 
  select(post_text, uni, user_name, answer_to_user, post_time)

remove(tread_hse, tread_leti, tread_poly)

Density

library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
treads %>% 
  select(post_time, uni) %>% 
  mutate(post_time = as.Date(post_time)) %>% 
  ggplot(aes(x = post_time, group = uni, fill = uni)) + 
  # geom_density(alpha = 0.4)+
  geom_histogram(binwidth = 14, alpha = 0.7)+
  theme_minimal()+
  scale_x_date(labels = scales::date_format("%Y"), date_breaks = "1 year")+
  theme(axis.text.x = element_text(angle = 0))+ 
  geom_vline(xintercept = "2019-09-01 20:34:00 UTC" %>% as.Date(), size = 1, linetype="dashed")+
  geom_text(aes(x="2019-09-01 20:34:00 UTC" %>% as.Date(), label="\n1 sept", y=70), colour="black", angle=90, text=element_text(size=14))
## Warning: Ignoring unknown parameters: text

NETWORKS

treads %>% 
  select(-post_text) -> treads_nw

remove(treads)

library(igraph)
library(tidygraph)
## 
## Attaching package: 'tidygraph'
## The following objects are masked from 'package:quanteda':
## 
##     as.igraph, convert
## The following object is masked from 'package:igraph':
## 
##     groups
## The following object is masked from 'package:stats':
## 
##     filter
library(ggraph)

nw_hse = treads_nw %>% 
  filter(uni == "hse") %>% 
  group_by(user_name, answer_to_user) %>% 
  summarise(weight = n()) %>% 
  graph_from_data_frame()
## `summarise()` regrouping output by 'user_name' (override with `.groups` argument)
## Warning in graph_from_data_frame(.): In `d' `NA' elements were replaced with
## string "NA"
nw_hse_tidy <- as_tbl_graph(nw_hse)

nw_hse_tidy %>% 
  activate(edges) %>% 
  arrange(desc(weight))
## # A tbl_graph: 204 nodes and 1069 edges
## #
## # A directed multigraph with 1 component
## #
## # Edge Data: 1,069 x 3 (active)
##    from    to weight
##   <int> <int>  <int>
## 1   150     2    173
## 2    63     2     91
## 3   122     2     74
## 4    16     2     64
## 5   177     2     43
## 6    27     2     39
## # … with 1,063 more rows
## #
## # Node Data: 204 x 1
##   name      
##   <chr>     
## 1 @ревик    
## 2 ***@***   
## 3 0123456789
## # … with 201 more rows
nw_hse_tidy
## # A tbl_graph: 204 nodes and 1069 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 204 x 1 (active)
##   name      
##   <chr>     
## 1 @ревик    
## 2 ***@***   
## 3 0123456789
## 4 adelina   
## 5 AGRIVA    
## 6 ak4olatan 
## # … with 198 more rows
## #
## # Edge Data: 1,069 x 3
##    from    to weight
##   <int> <int>  <int>
## 1     1    20      1
## 2     2     2     15
## 3     2    27      1
## # … with 1,066 more rows
ggraph(nw_hse_tidy) + 
  geom_edge_link() + 
  geom_node_point() + 
  theme_graph()
## Using `stress` as default layout

ggraph(nw_hse_tidy, layout = "graphopt") + 
  geom_node_point() +
  geom_edge_link(aes(width = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = name), repel = TRUE) +
  labs(edge_width = "Letters") +
  theme_graph()

library(visNetwork)

nw_hse_tidy%>% 
visIgraph()