Install required packages

# install.packages("readr")
# install.packages("plyr")
# install.packages("stringr")
# install.packages("stringi")
# install.packages("magrittr")
# install.packages("dplyr")
# install.packages("plotly")

library(readr)
## Warning: package 'readr' was built under R version 3.4.3
library("plyr")
## Warning: package 'plyr' was built under R version 3.4.3
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

User gender distribution by userlocation

Trump <- read_csv("D:\\New folder (7)\\Trump(1).csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_integer(),
##   MESSAGE_BODY = col_character(),
##   MESSAGE_COUNTRY = col_character(),
##   MESSAGE_FAVORITES_COUNT = col_integer(),
##   MESSAGE_LOCATION = col_character(),
##   MESSAGE_LOCATION_DISPLAY_NAME = col_character(),
##   MESSAGE_POSTED_TIME = col_datetime(format = ""),
##   MESSAGE_RETWEET_COUNT = col_integer(),
##   USER_CITY = col_character(),
##   USER_COUNTRY = col_character(),
##   USER_DISPLAY_NAME = col_character(),
##   USER_FOLLOWERS_COUNT = col_integer(),
##   USER_FRIENDS_COUNT = col_integer(),
##   USER_GENDER = col_character(),
##   USER_LOCATION_DISPLAY_NAME = col_character(),
##   USER_SCREEN_NAME = col_character()
## )
summary(Trump)
##        X1       MESSAGE_BODY       MESSAGE_COUNTRY   
##  Min.   :   1   Length:4955        Length:4955       
##  1st Qu.:1240   Class :character   Class :character  
##  Median :2478   Mode  :character   Mode  :character  
##  Mean   :2478                                        
##  3rd Qu.:3716                                        
##  Max.   :4955                                        
##  MESSAGE_FAVORITES_COUNT MESSAGE_LOCATION   MESSAGE_LOCATION_DISPLAY_NAME
##  Min.   :   0.000        Length:4955        Length:4955                  
##  1st Qu.:   0.000        Class :character   Class :character             
##  Median :   0.000        Mode  :character   Mode  :character             
##  Mean   :   2.442                                                        
##  3rd Qu.:   0.000                                                        
##  Max.   :1924.000                                                        
##  MESSAGE_POSTED_TIME           MESSAGE_RETWEET_COUNT  USER_CITY        
##  Min.   :2014-11-05 13:42:05   Min.   :  0.00        Length:4955       
##  1st Qu.:2015-09-17 20:30:05   1st Qu.:  0.00        Class :character  
##  Median :2016-02-08 07:47:24   Median :  1.00        Mode  :character  
##  Mean   :2015-12-21 06:49:03   Mean   : 41.34                          
##  3rd Qu.:2016-03-04 02:39:23   3rd Qu.: 13.00                          
##  Max.   :2016-03-30 02:45:13   Max.   :871.00                          
##  USER_COUNTRY       USER_DISPLAY_NAME  USER_FOLLOWERS_COUNT
##  Length:4955        Length:4955        Min.   :      0     
##  Class :character   Class :character   1st Qu.:    130     
##  Mode  :character   Mode  :character   Median :    534     
##                                        Mean   :  12514     
##                                        3rd Qu.:   2046     
##                                        Max.   :4714925     
##  USER_FRIENDS_COUNT USER_GENDER        USER_LOCATION_DISPLAY_NAME
##  Min.   :     0     Length:4955        Length:4955               
##  1st Qu.:   184     Class :character   Class :character          
##  Median :   592     Mode  :character   Mode  :character          
##  Mean   :  2088                                                  
##  3rd Qu.:  1714                                                  
##  Max.   :282971                                                  
##  USER_SCREEN_NAME  
##  Length:4955       
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
Trump$location <- tolower(Trump$USER_LOCATION_DISPLAY_NAME)
location <- subset(Trump, location =='united states' | location =='brooklyn, ny')
genderlocation <-data.frame(table(location[,c("location", "USER_GENDER")]))
xtabs(Freq~location+USER_GENDER,genderlocation)
##                USER_GENDER
## location        female male unknown
##   brooklyn, ny       0    4       6
##   united states     10   18      92
tab = reshape(genderlocation,direction="wide",timevar="USER_GENDER",idvar="location")

p <- plot_ly(tab, x = ~location, y = ~Freq.female, type = 'bar', name = 'Female') %>%
  add_trace(y = ~Freq.male, name = 'Male') %>%
  layout(yaxis = list(title = 'Count'), barmode = 'group')

p
## Warning: package 'bindrcpp' was built under R version 3.4.3

User gender distribution by city

Trump$city <- tolower(Trump$USER_CITY)

subdf <- subset(Trump, city =='new york city' | city =='brooklyn')
genderCity <-data.frame(table(subdf[,c("city", "USER_GENDER")]))
xtabs(Freq~city+USER_GENDER,genderCity)
##                USER_GENDER
## city            female male unknown
##   brooklyn           0   10       6
##   new york city      8   45      44
tab = reshape(genderCity,direction="wide",timevar="USER_GENDER",idvar="city")

p <- plot_ly(tab, x = ~city, y = ~Freq.female, type = 'bar', name = 'Female') %>%
  add_trace(y = ~Freq.male, name = 'Male') %>%
  layout(yaxis = list(title = 'Count'), barmode = 'group')

p

User posting time by gender

Trump$days <- weekdays(as.POSIXlt(Trump$MESSAGE_POSTED_TIME))
dfrm <-data.frame(table(Trump[,c("USER_GENDER","days")]))
genderDays = reshape(dfrm,direction="wide",timevar="days",idvar="USER_GENDER")

p <- plot_ly(genderDays, x = ~USER_GENDER, y = ~Freq.Monday, type = 'bar', name = 'Monday') %>%
  add_trace(y = ~Freq.Tuesday, name = 'Tuesday') %>%
   add_trace(y = ~Freq.Wednesday, name = 'Wednesday') %>%
   add_trace(y = ~Freq.Thursday, name = 'Thursday') %>%
   add_trace(y = ~Freq.Friday, name = 'Friday') %>%
   add_trace(y = ~Freq.Saturday, name = 'Saturday') %>%
   add_trace(y = ~Freq.Sunday, name = 'Sunday') %>%
  layout(yaxis = list(title = 'Count'), barmode = 'group')

p