library(readr)
library("plyr")
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

User follower’s count distribution by city

# Load data
Trump <- read_csv("Trump.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_integer(),
##   MESSAGE_BODY = col_character(),
##   MESSAGE_COUNTRY = col_character(),
##   MESSAGE_FAVORITES_COUNT = col_integer(),
##   MESSAGE_LOCATION = col_character(),
##   MESSAGE_LOCATION_DISPLAY_NAME = col_character(),
##   MESSAGE_POSTED_TIME = col_datetime(format = ""),
##   MESSAGE_RETWEET_COUNT = col_integer(),
##   USER_CITY = col_character(),
##   USER_COUNTRY = col_character(),
##   USER_DISPLAY_NAME = col_character(),
##   USER_FOLLOWERS_COUNT = col_integer(),
##   USER_FRIENDS_COUNT = col_integer(),
##   USER_GENDER = col_character(),
##   USER_LOCATION_DISPLAY_NAME = col_character(),
##   USER_SCREEN_NAME = col_character()
## )
Trump$country <- tolower(Trump$USER_COUNTRY)

# Get subset of the data to explore
subdf <- subset(Trump, country =='united states' | country =='united kingdom' | country =='india' | country =='')
followerCountry <-data.frame(table(subdf[,c("country", "USER_GENDER")]))
xtabs(Freq~country+USER_GENDER,followerCountry)
##                 USER_GENDER
## country          female male unknown
##   india               2    7       1
##   united kingdom     13   55      52
##   united states     487 1011    1166
tab = reshape(followerCountry,direction="wide",timevar="USER_GENDER",idvar="country")

p <- plot_ly(tab, x = ~country, y = ~Freq.female, type = 'bar', name = 'Female') %>%
  add_trace(y = ~Freq.male, name = 'Male') %>%
  layout(yaxis = list(title = 'Count'), barmode = 'group')

p

User posting time by gender

Trump$days <- weekdays(as.POSIXlt(Trump$MESSAGE_POSTED_TIME))
Trump$city <- tolower(Trump$USER_COUNTRY)

# Get subset of the data to explore
subdf <- subset(Trump, city =='united states' | city =='united kingdom')
dfrm <-data.frame(table(Trump[,c("USER_GENDER","days")]))
genderDays = reshape(dfrm,direction="wide",timevar="days",idvar="USER_GENDER")

p <- plot_ly(genderDays, x = ~USER_GENDER, y = ~Freq.Monday, type = 'bar', name = 'Monday') %>%
   add_trace(y = ~Freq.Tuesday, name = 'Tuesday') %>%
   add_trace(y = ~Freq.Wednesday, name = 'Wednesday') %>%
   add_trace(y = ~Freq.Thursday, name = 'Thursday') %>%
   add_trace(y = ~Freq.Friday, name = 'Friday') %>%
   add_trace(y = ~Freq.Saturday, name = 'Saturday') %>%
   add_trace(y = ~Freq.Sunday, name = 'Sunday') %>%
   layout(yaxis = list(title = 'Count'), barmode = 'group')

p