Final Project Apple data

library(SentimentAnalysis)

## 
## Attaching package: 'SentimentAnalysis'

## The following object is masked from 'package:base':
## 
##     write

library(tidyverse)

## -- Attaching packages ---------------------------------------------------------------- tidyverse 1.2.1 --

## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0

## -- Conflicts ------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(quantmod)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

## Loading required package: TTR

## Version 0.4-0 included new data defaults. See ?getSymbols.

library(rtweet)

## 
## Attaching package: 'rtweet'

## The following object is masked from 'package:purrr':
## 
##     flatten

Getting aapl data

##Creating the connection with rtweet
create_token(
  app = "607",
  consumer_key = "KmfIjPiLdO1mZdam7fgEA9XXf",
  consumer_secret = "trP9eQKvDjzzERU2oyRHcJKqUlv3R8hq0ABWRCYy5V3L23lGmY",
  access_token = "480323958-6kQk1ST8tm0CA2qLShYtnozd2Y3rJSsePg5ejqci",
  access_secret = "Zj4q5qqm7pcBpQ4ZOT2TaFBIcZeJ2g1b3bDE3SgMrS1rY"
)

dates <- seq.Date(from = as.Date("2019-11-07"), to = as.Date("2019-12-06"), by =  1)

##Apple tweets
df_aapl <- data.frame()

for (i in seq_along(dates)) {
  df_temp<-search_tweets("@$AAPL OR @AAPLNews", n =10000,
                         lang = 'en', include_rts = FALSE, 
                         retryonratelimit = TRUE)
  df_apl <- rbind(df_aapl, df_temp)
}

aapl<-df_aapl
#Encode to utf8
aapl$text <- enc2utf8(aapl$text)
date <- aapl$created_at
date <- str_extract(date, "\\d{4}-\\d{2}-\\d{2}")
date <- as.Date(date)
date <- as.Date(date, format ="%m/%d/%y")
aapl$Date <- date
#Keep only the columns we need
aapl<-subset(aapl, select=c(Date,text))
#Write as csv file, this has been upload at 
write.csv(aapl, file ="aapl.csv", row.names = FALSE, fileEncoding="UTF-8")

Gettingprice

#setting dates on which we need the prices
start <- as.Date("2019-11-27")
end <- as.Date("2019-12-06")
getSymbols("AAPL", src = "yahoo", from = start, to = end)

## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
## 
## This message is shown once per session and may be disabled by setting 
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.

## [1] "AAPL"

price_aapl<-as.data.frame(AAPL)
#Extracing date
price_aapl$Date<-as.Date(index(AAPL))

#read the file into R
aapl<-read.csv('https://raw.githubusercontent.com/zahirf/Data607/master/aapl.csv', stringsAsFactors = FALSE)
class(aapl$Date)

## [1] "character"

#date is read in as character, so we convert
aapl$Date<-as.Date(aapl$Date)
#clean up the text
aapl$text <- gsub("http.*", "", aapl$text)#remove url
aapl$text <- gsub("https.*", "", aapl$text)#remove url
aapl$text <- gsub("&amp;", "&", aapl$text) #remove &
aapl$text <- gsub("$aapl ", "", aapl$text)#remove handle
aapl$text <- gsub("^[[:space:]]*","",aapl$text) # Remove leading whitespaces
aapl$text <- gsub("[[:space:]]*$","",aapl$text) # Remove trailing whitespaces
aapl$text <- gsub(" +"," ",aapl$text) #Remove extra whitespaces
aapl$text <- iconv(aapl$text, "latin1", "ASCII", sub="") # Remove emojis
aapl$text <- gsub("\\n", "", aapl$text) #Replace line breaks with ""
aapl$text <- gsub("[[:punct:]]","",aapl$text) # Remove punctuation
glimpse(aapl$text)

##  chr [1:31620] "Felt bearish for next week In AAPL GOOGL SPY puts" ...

Sentiment Analysis

#run sentiment analysis
sentiment_aapl<- analyzeSentiment(aapl$text,
                            language = "english",
                            removeStopwords = TRUE, stemming = TRUE)

Build dataframe

#Build the final dataframe for analysis
df_aapl<-cbind(aapl$Date, sentiment_aapl$WordCount, sentiment_aapl$SentimentGI)
df_aapl<-as.data.frame(df_aapl)
#remove na values
df_aapl <- df_aapl[complete.cases(df_aapl), ]
#rename columns after cbind
colnames(df_aapl)[1:3]<-c("Date", "Count", "MeanSentiment")
#Frequency of sentiment words by date
count_aapl<-df_aapl%>%
  group_by(Date)%>%
  summarise(Count = sum(Count))
#Mean sentiments by date
mean_aapl<-df_aapl%>%
  group_by(Date)%>%
  summarise(Mean = mean(MeanSentiment))

Binding the data together

ret<-read.csv("returns.csv", stringsAsFactors = FALSE)
mean_aapl<-cbind(mean_aapl,ret$AAPL, count_aapl$Count)
colnames(mean_aapl)[3]<-("Returns")
colnames(mean_aapl)[4]<-("Count")
write.csv(mean_aapl, file ="data_aapl.csv", row.names = FALSE, fileEncoding="UTF-8")

Final Project Apple data

Farhana Zahir

12/7/2019