#install.packages("twitteR")
library(twitteR)
#Example from http://www.rdatamining.com/examples/text-mining
#https://dev.twitter.com/
#http://geoffjentry.hexdump.org/twitteR.pdf
#https://twitter.com/apps/new
#>setup_twitter_oauth("APIkey","APIsecret","Accesstoken","Accesssecret “)
setup_twitter_oauth("PETxc9u6KqENPByL805wlFO1M","ptysLxcJo6ZSBdl6CIM1Nhdv7w16oSnsRe0PI15jwvieyG9biw","3232641518-5AERq2G5xQHWKVsQWy0pKNZYCixFQH6d77CSRng","Tkt4UHBMXwZqtlTHCx8gw36h2VKU1RGAZrhiA4uCmWxgA")
## [1] "Using direct authentication"
rdmTweets <- userTimeline("cia", n=500)
rdmTweets[1:3]
## [[1]]
## [1] "CIA: CIA #Museum Artifact of the Week: Afghan Hat\nA gift from Afghan President Karzai to former DCI George TenetÂ… https://t.co/zUAZ9OqMQB"
##
## [[2]]
## [1] "CIA: ICYMI:\nNew Anthology: \nCIA & the Wars in Southeast Asia, 1947-75\n\n41 #unclassified articles & more!Â… https://t.co/ukmH3tTIoY"
##
## [[3]]
## [1] "CIA: ICYMI:\nNew #Unclassified \"Studies in Intel\":\n-Intel for Warfighter\n-Why Bad Things Happen to Good Analysts\n-& more!Â… https://t.co/pNPryg92u1"
df <- do.call("rbind", lapply(rdmTweets, as.data.frame))
library(tm)
## Loading required package: NLP
Corpus1=Corpus(VectorSource(df$text))
Corpus1 <- tm_map(Corpus1, removePunctuation)
Corpus1 <- tm_map(Corpus1, removeNumbers)
Corpus1 <- tm_map(Corpus1, tolower)
Corpus1 <- tm_map(Corpus1, removeWords, stopwords("english"))
Corpus1 <- tm_map(Corpus1, stemDocument)
Corpus1 <- tm_map(Corpus1, stripWhitespace)
Corpus1 <- tm_map(Corpus1, PlainTextDocument)
dtm <- DocumentTermMatrix(Corpus1)
tdm <- TermDocumentMatrix(Corpus1)
matx1=as.matrix(tdm)
sort1=sort(rowSums(matx1),decreasing=T)
di=data.frame(Word=names(sort1),Frequency=sort1)
#install.packages("devtools")
library(devtools)
#install_github('sentiment140', 'okugami79')
library(sentiment)
## Loading required package: RCurl
## Loading required package: bitops
## Loading required package: rjson
## Loading required package: plyr
##
## Attaching package: 'plyr'
## The following object is masked from 'package:twitteR':
##
## id
a=sentiment(di$Word)
table(a$polarity)
##
## negative neutral
## 2 1005
library(wordcloud)
## Loading required package: RColorBrewer
wordcloud(di$Word, di$Frequency, max.words=100,colors=brewer.pal(6, "Set1"))

findFreqTerms(dtm, lowfreq=10)
## [1] "amp" "artifact" "cia" "icymi" "inmemoriam"
## [6] "intel" "intelcon" "museum" "oss" "pdb"
## [11] "week"
findAssocs(dtm, 'cia', 0.30)
## $cia
## artifact museum week
## 0.46 0.46 0.41
#dtms <- removeSparseTerms(dtm, 0.15) # Prepare the data (max 15% empty space)
library(cluster)
d <- dist(t(dtm), method="euclidian") # First calculate distance between words
fit <- hclust(d=d, method="ward")
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"
plot.new()
plot(fit, hang=-1)
groups <- cutree(fit, k=5) # "k=" defines the number of clusters you are using
rect.hclust(fit, k=5, border="red") # draw dendogram with red borders around the 5 clusters
### K-means clustering
library(fpc)

library(cluster)
d <- dist(t(dtm), method="euclidian")
kfit <- kmeans(d, 2)
clusplot(as.matrix(d), kfit$cluster, color=T, shade=T, labels=2, lines=0)
