---
title: "Social Media Analytics for @CRAdvocacy"
output: html_notebook

---
```{r message=FALSE, warning=FALSE, include=FALSE, paged.print=FALSE}
library(data.table) #a library for data cleaning 
library(dplyr) #a library for data cleaning 
library(dbplyr) #a library for data cleaning 
library(RSQLite) #a library for importing data from a SQLite database
library(quanteda) #a library for quantitative textual analysis
library(ggplot2) #for plotting data
library(igraph) #for network analysis
library(lubridate)
library(scales)
library(stringr)
library(wordcloud)
library(syuzhet)
library(reshape2)
library(rCharts)
library(tidyverse)
library(tibbletime)
library(zoo)
library(quanteda)
library(rCharts)

db <- src_sqlite("/home/weiaiwaynexu/Dropbox/Acer Laptop Sync/Data Science/Nonprofits_socialmediaanalytics/nonprofits_accounts_tweets.sqlite", create = FALSE)
df <- tbl(db, sql("SELECT * FROM tweets")) #table name in the sqlite database is named "tweets"
df <- collect(df) #load the data into memory
df <- df[!duplicated(df$content),] #remove duplicates

df$created_at <- ymd_hms(df$created_at) 
df$created_at <- with_tz(df$created_at,"America/New_York")
df$created_date <- as.Date(df$created_at)
df$created_date_label <- as.factor(df$created_date)
df$Month_Yr <- format(as.Date(df$created_date), "%Y-%m")
df$Month_Yr <- ymd(paste(df$Month_Yr, "15",sep="-"))
df$Month_Yr_label <- as.factor(df$Month_Yr)
df <- df[!is.na(df$from_user_screen_name),]
df <- df[df$from_user_screen_name!="WeiaiWayne",]
df$from_user_screen_name <- as.factor(df$from_user_screen_name)
df<- df[df$retweeted_status != "THIS IS A RETWEET",]
df$from_user_screen_name <- tolower(df$from_user_screen_name)

df_bydate_outlet <- df %>% 
  group_by(created_date_label, from_user_screen_name) %>% 
  summarise(sum_rt = sum(retweet_count),
            sum_fav = sum(favorite_count),
            sum_mention = sum(entities_mentions_count),
            sum_hashtag = sum(entities_hashtags_count),
            tweet_count = length(unique(content))) %>% melt

df_bydate_outlet$created_date_label <- as.Date(df_bydate_outlet$created_date_label)
```

```{r echo=FALSE, message=FALSE, warning=FALSE}
options(RCHART_WIDTH = 1000, RCHART_HEIGHT = 700)
knitr::opts_chunk$set(comment = NA, results = "asis", tidy = F, message = F)
df_bydate_outlet$date <- as.character(df_bydate_outlet$created_date_label)

h1 <- hPlot(x = "date", y = "value", data = df_bydate_outlet[df_bydate_outlet$variable=="tweet_count" & df_bydate_outlet$from_user_screen_name=="cradvocacy",], type = "line", group = "from_user_screen_name")

h1$yAxis(list(list(title = list(text = 'Tweet count per day'))))
h1$title(text='Tweet count @CRAdvocacy')
h1$set(parseTime = FALSE)
h1$print("chart1",include_assets = TRUE)
```
```{r echo=FALSE, message=FALSE, warning=FALSE}
options(RCHART_WIDTH = 1000, RCHART_HEIGHT = 700)
knitr::opts_chunk$set(comment = NA, results = "asis", tidy = F, message = F)
df_bydate_outlet$date <- as.character(df_bydate_outlet$created_date_label)

h2 <- hPlot(x = "date", y = "value", data = df_bydate_outlet[df_bydate_outlet$variable=="sum_rt" & df_bydate_outlet$from_user_screen_name=="cradvocacy",], type = "line", group = "from_user_screen_name")

h2$colors('rgba(212, 175, 232, 1)','rgba(223, 83, 83, .5)','rgba(60, 179, 113, .5)')

h2$yAxis(list(list(title = list(text = 'Retweet count per day'))))
h2$title(text='Retweet count @CRAdvocacy')
h2$set(parseTime = FALSE)
h2$print("chart2",include_assets = TRUE)
```

```{r echo=FALSE, message=FALSE, warning=FALSE}
options(RCHART_WIDTH = 1000, RCHART_HEIGHT = 700)
knitr::opts_chunk$set(comment = NA, results = "asis", tidy = F, message = F)
df_bydate_outlet$date <- as.character(df_bydate_outlet$created_date_label)

h3 <- hPlot(x = "date", y = "value", data = df_bydate_outlet[df_bydate_outlet$variable=="sum_fav" & df_bydate_outlet$from_user_screen_name=="cradvocacy",], type = "line", group = "from_user_screen_name")

h3$colors('rgba(223, 83, 83, .5)', 'rgba(119, 152, 191, .5)', 'rgba(60, 179, 113, .5)')

h3$yAxis(list(list(title = list(text = 'Favorite count per day'))))
h3$title(text='Favorite count @CRAdvocacy')
h3$set(parseTime = FALSE)
h3$print("chart3",include_assets = TRUE)
```
```{r echo=FALSE, message=FALSE, warning=FALSE}
options(RCHART_WIDTH = 1000, RCHART_HEIGHT = 700)
knitr::opts_chunk$set(comment = NA, results = "asis", tidy = F, message = F)
df_bydate_outlet$date <- as.character(df_bydate_outlet$created_date_label)

h4 <- hPlot(x = "date", y = "value", data = df_bydate_outlet[df_bydate_outlet$variable=="sum_mention" & df_bydate_outlet$from_user_screen_name=="cradvocacy",], type = "line", group = "from_user_screen_name")

h4$colors('rgba(60, 179, 113, .5)','rgba(223, 83, 83, .5)', 'rgba(119, 152, 191, .5)')


h4$yAxis(list(list(title = list(text = 'Twitter mention count per day'))))
h4$title(text='Mention count @CRAdvocacy')
h4$set(parseTime = FALSE)
h4$print("chart4",include_assets = TRUE)
```
```{r echo=FALSE, message=FALSE, warning=FALSE}
options(RCHART_WIDTH = 1000, RCHART_HEIGHT = 700)
knitr::opts_chunk$set(comment = NA, results = "asis", tidy = F, message = F)
df_bydate_outlet$date <- as.character(df_bydate_outlet$created_date_label)

h5 <- hPlot(x = "date", y = "value", data = df_bydate_outlet[df_bydate_outlet$variable=="sum_hashtag" & df_bydate_outlet$from_user_screen_name=="cradvocacy",], type = "line", group = "from_user_screen_name")

h5$colors('rgba(119, 152, 191, .7)','rgba(223, 83, 83, .5)', 'rgba(60, 179, 113, .5)')

h5$yAxis(list(list(title = list(text = 'Hashtag count per day'))))
h5$title(text='Hashtag count @CRAdvocacy')
h5$set(parseTime = FALSE)
h5$print("chart5",include_assets = TRUE)
```

