I am reading a data that is collected from twitter on the account of sentiment analysis of Demonetisation on the 1st anniversary of banning the 500 and 1000 rupees notes.

I am using references from Data Byte - NIT Trichy’s Project and Kaggle (for dataset and tutorials).

demonet_tweet <- read.csv("C:\\Users\\user\\Desktop\\demonetization-tweets.csv")

For handling the complex dataset, I will be using many packages, they are as follows,

library(readr) 
library(dplyr) 
## Warning: package 'dplyr' was built under R version 3.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2) 
library(ggvis) 
## 
## Attaching package: 'ggvis'
## The following object is masked from 'package:ggplot2':
## 
##     resolution
library(gdata) 
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
## 
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
## 
## Attaching package: 'gdata'
## The following objects are masked from 'package:dplyr':
## 
##     combine, first, last
## The following object is masked from 'package:stats':
## 
##     nobs
## The following object is masked from 'package:utils':
## 
##     object.size
## The following object is masked from 'package:base':
## 
##     startsWith
library(lubridate) 
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(formattable) 
## Warning: package 'formattable' was built under R version 3.4.3

To have a look at the dataset,

glimpse(demonet_tweet) #This function defined in dplyr
## Observations: 8,000
## Variables: 15
## $ S.no          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...
## $ text          <fctr> RT @rssurjewala: Critical question: Was PayTM i...
## $ favorited     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,...
## $ favoriteCount <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
## $ replyToSN     <fctr> NA, NA, NA, NA, NA, DerekScissors1, NA, NA, NA,...
## $ created       <fctr> 23-11-2016 18:40, 23-11-2016 18:40, 23-11-2016 ...
## $ truncated     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,...
## $ replyToSID    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ id            <dbl> 8.01496e+17, 8.01496e+17, 8.01496e+17, 8.01496e+...
## $ replyToUID    <dbl> NA, NA, NA, NA, NA, 2586266100, NA, NA, NA, NA, ...
## $ statusSource  <fctr> <a href="http://twitter.com/download/android" r...
## $ screenName    <fctr> HASHTAGFARZIWAL, PRAMODKAUSHIK9, rahulja1303494...
## $ retweetCount  <int> 331, 66, 12, 338, 120, 0, 637, 112, 1, 0, 1, 120...
## $ isRetweet     <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE,...
## $ retweeted     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,...

To analyse the tweets by user names.

demonet_tweet_Name <-
  demonet_tweet %>%
  group_by(screenName) %>%
  summarize(freq=n()) %>%
  arrange(desc(freq))
demonet_tweet_Name %>%
  summarise(maxTweets = max(freq), minTweets = min(freq))
## # A tibble: 1 x 2
##   maxTweets minTweets
##       <dbl>     <dbl>
## 1        19         1
demonet_tweet_Name %>%
  ggplot(aes(freq)) + 
  geom_density() + 
  theme(axis.text=element_text(size=16), 
        axis.title=element_text(size=20))

As the data is text based, other EDA could not be done.

head(demonet_tweet) 
##   S.no
## 1    1
## 2    2
## 3    3
## 4    4
## 5    5
## 6    6
##                                                                                                                                               text
## 1 RT @rssurjewala: Critical question: Was PayTM informed about #Demonetization edict by PM? It's clearly fishy and requires full disclosure &amp;<U+0085>
## 2                                                                               RT @Hemant_80: Did you vote on #Demonetization on Modi survey app?
## 3   RT @roshankar: Former FinSec, RBI Dy Governor, CBDT Chair + Harvard Professor lambaste #Demonetization.\n\nIf not for Aam Aadmi, listen to th<U+0085>
## 4     RT @ANI_news: Gurugram (Haryana): Post office employees provide cash exchange to patients in hospitals #demonetization https://t.co/uGMxUP9<U+0085>
## 5                                      RT @satishacharya: Reddy Wedding! @mail_today cartoon #demonetization #ReddyWedding https://t.co/u7gLNrq31F
## 6                        @DerekScissors1: India<U+0092>s #demonetization: #Blackmoney a symptom, not the disease https://t.co/HSl6Ihj0Qe via @ambazaarmag
##   favorited favoriteCount      replyToSN          created truncated
## 1     FALSE             0           <NA> 23-11-2016 18:40     FALSE
## 2     FALSE             0           <NA> 23-11-2016 18:40     FALSE
## 3     FALSE             0           <NA> 23-11-2016 18:40     FALSE
## 4     FALSE             0           <NA> 23-11-2016 18:39     FALSE
## 5     FALSE             0           <NA> 23-11-2016 18:39     FALSE
## 6     FALSE             0 DerekScissors1 23-11-2016 18:39     FALSE
##   replyToSID          id replyToUID
## 1         NA 8.01496e+17         NA
## 2         NA 8.01496e+17         NA
## 3         NA 8.01496e+17         NA
## 4         NA 8.01496e+17         NA
## 5         NA 8.01495e+17         NA
## 6         NA 8.01495e+17 2586266100
##                                                                           statusSource
## 1 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
## 2 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
## 3 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
## 4 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
## 5                        <a href="http://cpimharyana.com" rel="nofollow">CPIMBadli</a>
## 6                   <a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>
##        screenName retweetCount isRetweet retweeted
## 1 HASHTAGFARZIWAL          331      TRUE     FALSE
## 2  PRAMODKAUSHIK9           66      TRUE     FALSE
## 3 rahulja13034944           12      TRUE     FALSE
## 4       deeptiyvd          338      TRUE     FALSE
## 5       CPIMBadli          120      TRUE     FALSE
## 6     ambazaarmag            0     FALSE     FALSE
dim(demonet_tweet)
## [1] 8000   15
summary(demonet_tweet)
##       S.no     
##  Min.   :   1  
##  1st Qu.:2001  
##  Median :4000  
##  Mean   :4000  
##  3rd Qu.:6000  
##  Max.   :8000  
##                
##                                                                                                                                                                                                                                                                                               text     
##  RT @gauravcsawant: Rs 40 lakh looted from a bank in Kishtwar in J&amp;K. Third such incident since #demonetization. That's how terrorists have<U+0085>                                                                                                                                                : 541  
##  RT @DrKumarVishwas: And the Oscar goes to "Mr.<U+092D><U+093E><U+0935><U+0941><U+0915>" <ed><U+00A0><U+00BD><ed><U+00B8><U+00A9><ed><U+00A0><U+00BD><ed><U+00B8><U+00A5><ed><U+00A0><U+00BD><ed><U+00B8><U+00A2><ed><U+00A0><U+00BD><ed><U+00B8><U+00AD>#demonetization https://t.co/ObQrhlNSL6: 350  
##  RT @rssurjewala: Critical question: Was PayTM informed about #Demonetization edict by PM? It's clearly fishy and requires full disclosure &amp;<U+0085>                                                                                                                                               : 276  
##  RT @ModiBharosa: Putting Nation over Party Politics #nitishkumar supports PM @narendramodi on #Demonetization https://t.co/UodwXdPMmG                                                                                                                                                          : 253  
##  RT @ModiBharosa: Huge support for PM @narendramodi <U+0092>s #demonetization Move Across the Nation \n80-86% people back demonetization: C-voter su<U+0085>                                                                                                                                                  : 246  
##  RT @ashu3page: Man ends life over fund shortage ahead of daughter<U+0092>s wedding in Gujarat. #Demonetization \nhttps://t.co/DGAI5cf05y \nhttps://t<U+0085>                                                                                                                                                 : 119  
##  (Other)                                                                                                                                                                                                                                                                                        :6215  
##  favorited       favoriteCount               replyToSN   
##  Mode :logical   Min.   :  0.0000   narendramodi  :  23  
##  FALSE:8000      1st Qu.:  0.0000   centerofright :  13  
##                  Median :  0.0000   ArvindKejriwal:  11  
##                  Mean   :  0.7181   PMOIndia      :   8  
##                  3rd Qu.:  0.0000   CNNnews18     :   5  
##                  Max.   :341.0000   (Other)       : 256  
##                                     NA's          :7684  
##              created     truncated         replyToSID       
##  23-11-2016 09:57:  48   Mode :logical   Min.   :6.458e+17  
##  23-11-2016 06:09:  31   FALSE:7756      1st Qu.:8.011e+17  
##  23-11-2016 06:03:  30   TRUE :244       Median :8.013e+17  
##  23-11-2016 09:53:  25                   Mean   :8.004e+17  
##  22-11-2016 12:00:  22                   3rd Qu.:8.013e+17  
##  23-11-2016 06:25:  22                   Max.   :8.015e+17  
##  (Other)         :7822                   NA's   :7774       
##        id              replyToUID       
##  Min.   :8.010e+17   Min.   :2.468e+06  
##  1st Qu.:8.011e+17   1st Qu.:3.583e+07  
##  Median :8.013e+17   Median :1.434e+08  
##  Mean   :8.013e+17   Mean   :2.129e+16  
##  3rd Qu.:8.014e+17   3rd Qu.:1.207e+09  
##  Max.   :8.015e+17   Max.   :8.011e+17  
##                      NA's   :7684       
##                                                                                statusSource 
##  <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>:4247  
##  <a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>                  :1407  
##  <a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>  :1098  
##  <a href="https://about.twitter.com/products/tweetdeck" rel="nofollow">TweetDeck</a> : 260  
##  <a href="https://mobile.twitter.com" rel="nofollow">Mobile Web (M5)</a>             : 178  
##  <a href="http://www.facebook.com/twitter" rel="nofollow">Facebook</a>               : 109  
##  (Other)                                                                             : 701  
##          screenName    retweetCount    isRetweet       retweeted      
##  DipendraDipzo:  19   Min.   :   0.0   Mode :logical   Mode :logical  
##  AmiKanu      :  18   1st Qu.:   4.0   FALSE:1776      FALSE:8000     
##  chacha_ninja :  16   Median :  41.0   TRUE :6224                     
##  MarathaBaan  :  16   Mean   : 167.3                                  
##  centerofright:  15   3rd Qu.: 221.0                                  
##  monk_razr    :  15   Max.   :1944.0                                  
##  (Other)      :7901