1. read the files

this file is filtered by ngos and corporate usernames

a total of 2479 acccounts which is composed of 1567 ngos and 906 corporate accounts

Q: quite a few duplicate business accounts - maybe there were parent-offspring relationshis?

actors <-read.csv(file="actors_combined.csv", sep="\t", colClasses=c("screen_name"="character", "type"="character"))

tweets <-read.csv(file="filtered_later_april_half.csv", sep="\t", colClasses=c("user_screen_name"="character", "retweet_screen_name"="character", "in_reply_to_screen_name"="character", "id"="character"))

2. remove retweets

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
none_rts <-tweets %>% filter(retweet_screen_name=="")

3. extract mentions

library(dplyr)
library(tm)
## Loading required package: NLP
library(tidyr)
library(stringr)

none_rts <- none_rts %>% 
            mutate(mention=str_extract_all(none_rts$text,"@\\w+")) %>%
             unnest(mention)

4. remove @, then select the corresponding columns

none_rts$mentioned_user <- gsub("@", "", none_rts$mention)
none_rts_v2 <- none_rts %>% select(user_screen_name, text, mentioned_user)
none_rts_v3 <-none_rts_v2 %>% select(user_screen_name, mentioned_user)

4. keep only mentions of other actors in the list

mentions <- none_rts_v3 %>% filter(none_rts_v3$mentioned_user %in% actors$screen_name)

4. make the extracted retweets into an edgelist

mentions_v2 <- mentions %>% select(user_screen_name, mentioned_user)

5. convert the edgelist to an igraph object

library(igraph)
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
mt_net <- graph_from_data_frame(mentions_v2, directed=T, vertices=actors)

6. simplify net

mt_net <- simplify(mt_net)

7. how many nodes and connections ?

vcount(mt_net)
## [1] 2468
ecount(mt_net)
## [1] 15

8. how many isolates

V(mt_net)$degree <- degree(mt_net, mode="all")
sum(V(mt_net)$degree==0)
## [1] 2444

9. plot the network with everyone in the actor list

plot(mt_net, vertex.label=NA, edge.arrow.size=0.04, edge.width=0.07, vertex.size=1)

10. plot the network with only those who have at least one connection

mt_iso <- V(mt_net)[degree(mt_net)==0]
mt_net_noniso <- delete.vertices(mt_net, mt_iso)
plot(mt_net_noniso, edge.arrow.size=0.1)

11. color the nodes based on the type of actors

V(mt_net_noniso)$color <-ifelse(V(mt_net_noniso)$type=="ngos", "yellow", "pink")
plot(mt_net_noniso, edge.arrow.size=0.1, vertex.color=V(mt_net_noniso)$color)