For this assignment, I am tasked with getting an example from Text Mining with R running and then extending the example to a new corpus and a neew sentiment lexicon. Sections 1-6 are directly from Text Mining with R1. I attempted using a mendeley github, and encountered numerous errors.
remotes::install_github("zeehio/mendeleyr")
## Skipping install of 'mendeleyr' from a github remote, the SHA1 (079707cf) has not changed since last install.
## Use `force = TRUE` to force installation
library(ggplot2)
library(remotes)
## Warning: package 'remotes' was built under R version 3.6.3
library(stringr)
library(plyr)
library(twitteR)
## Warning: package 'twitteR' was built under R version 3.6.3
##
## Attaching package: 'twitteR'
## The following object is masked from 'package:plyr':
##
## id
# Mendeley only works with older versions of R, so using this package impacted my entire code.
library(mendeleyr)
require(rplos)
## Loading required package: rplos
## Warning: package 'rplos' was built under R version 3.6.3
##Get number of papers with our terms
tweets = searchTwitter("Mendeley", n=1500)
## Error in get_oauth_sig(): OAuth has not been registered for this session
# I was unable to apply for a twritter dev acct, so I have no oauth
setup_twitter_oauth("API key", "API secret")
## [1] "Using browser based authentication"
## Error in init_oauth1.0(self$endpoint, self$app, permission = self$params$permission, : Unauthorized (HTTP 401).
tweets_df = ldply(tweets, function(t) t$toDataFrame() )
## Error in ldply(tweets, function(t) t$toDataFrame()): object 'tweets' not found
surewords = scan('sure-words.txt', what = 'character', comment.char = ';')
## Warning in file(file, "r"): cannot open file 'sure-words.txt': No such file or
## directory
## Error in file(file, "r"): cannot open the connection
unsurewords = scan('unsure-words.txt', what = 'character', comment.char = ';')
## Warning in file(file, "r"): cannot open file 'unsure-words.txt': No such file or
## directory
## Error in file(file, "r"): cannot open the connection
surewords = c(surewords, '[add words here')
## Error in eval(expr, envir, enclos): object 'surewords' not found
surecount<-plosword(surewords, vis = 'TRUE')
## Error in plosword(surewords, vis = "TRUE"): object 'surewords' not found
unsurecount<-plosword(unsurewords, vis = 'TRUE')
## Error in plosword(unsurewords, vis = "TRUE"): object 'unsurewords' not found
surecount_df<-surecount$table
## Error in eval(expr, envir, enclos): object 'surecount' not found
unsurecount_df<-unsurecount$table
## Error in eval(expr, envir, enclos): object 'unsurecount' not found
geteverything<-function(surewords){
out<-searchplos(surewords, 'id, title, subject, pagecount, publication_date, author, article_type, body,', 100)
}
makedf_everything<-function(sureword_everything_list){
out<-data.frame(sureword_everything_list)
}
#count terms
countterms<-function(sureword_everything_df){
out<-length(grep(surewords, sureword_everything_df$body))
}
sureword_everything_list<-llply(surewords, geteverything, .progress = 'text')
## Error in llply(surewords, geteverything, .progress = "text"): object 'surewords' not found
sureword_everything_df<-ldply(sureword_everything_list, makedf, .progress = 'text')
## Error in ldply(sureword_everything_list, makedf, .progress = "text"): object 'sureword_everything_list' not found
remove stuff
source_filtered_df<-subset(tweets_df, tweets_df$statusSource !="<a href="http://www.mendeley.com" rel="nofollow">Mendeley</a>")
## Error in subset(tweets_df, tweets_df$statusSource != "<a href="http://www.mendeley.com" rel="nofollow">Mendeley</a>"): object 'tweets_df' not found
sureword_everything_df[,1]<-NULL
## Error in sureword_everything_df[, 1] <- NULL: object 'sureword_everything_df' not found
sureword_everything_df$pagecount<-strtrim(sureword_everything_df$pagecount, 3)
## Error in strtrim(sureword_everything_df$pagecount, 3): object 'sureword_everything_df' not found
sureword_everything_df$id<-strtrim(sureword_everything_df$id, 29)
## Error in strtrim(sureword_everything_df$id, 29): object 'sureword_everything_df' not found
sureword_everything_df$publication_date<-strtrim(sureword_everything_df$publication_date, 10)
## Error in strtrim(sureword_everything_df$publication_date, 10): object 'sureword_everything_df' not found
convert numbers to numbers and dates to date
sureword_everything_df$publication_date<-strptime(sureword_everything_df$publication_date, format = "%Y-%m-%d")
## Error in strptime(sureword_everything_df$publication_date, format = "%Y-%m-%d"): object 'sureword_everything_df' not found
sureword_everything_df$pagecount<-as.numeric(sureword_everything_df$pagecount)
## Error in eval(expr, envir, enclos): object 'sureword_everything_df' not found
sureword_everything_df$body<-gsub('[[:cntrl:]]', '', sureword_everything_df$body)
## Error in gsub("[[:cntrl:]]", "", sureword_everything_df$body): object 'sureword_everything_df' not found
sureword_everything_df$figure_table_caption<-gsub('[[:cntrl:]]', '', sureword_everything_df$figure_table_caption)
## Error in gsub("[[:cntrl:]]", "", sureword_everything_df$figure_table_caption): object 'sureword_everything_df' not found
sureword_everything_df$materials_and_methods<-gsub('[[:cntrl:]]', '', sureword_everything_df$materials_and_methods)
## Error in gsub("[[:cntrl:]]", "", sureword_everything_df$materials_and_methods): object 'sureword_everything_df' not found
sureword_everything_df$results_and_discussion<-gsub('[[:cntrl:]]', '', sureword_everything_df$results_and_discussion)
## Error in gsub("[[:cntrl:]]", "", sureword_everything_df$results_and_discussion): object 'sureword_everything_df' not found
sureword_everything_df$introduction<-gsub('[[:cntrl:]]', '', sureword_everything_df$introduction)
## Error in gsub("[[:cntrl:]]", "", sureword_everything_df$introduction): object 'sureword_everything_df' not found
sureword_everything_df$body<-tolower(sureword_everything_df$body)
## Error in tolower(sureword_everything_df$body): object 'sureword_everything_df' not found
sureword_everything_df$figure_table_caption<-tolower(sureword_everything_df$figure_table_caption)
## Error in tolower(sureword_everything_df$figure_table_caption): object 'sureword_everything_df' not found
sureword_everything_df$materials_and_methods<-tolower(sureword_everything_df$materials_and_methods)
## Error in tolower(sureword_everything_df$materials_and_methods): object 'sureword_everything_df' not found
sureword_everything_df$results_and_discussion<-tolower(sureword_everything_df$results_and_discussion)
## Error in tolower(sureword_everything_df$results_and_discussion): object 'sureword_everything_df' not found
sureword_everything_df$introduction<-tolower(sureword_everything_df$introduction)
## Error in tolower(sureword_everything_df$introduction): object 'sureword_everything_df' not found
surewordcount<-data.frame(1)
for(j in 1:length(sureword_everything_df$body)){
for(i in 1:length(surewords)){
surewordcount[j,i]<-length(grep(surewords[i], sureword_everything_df$body[j]))
}
}
## Error in eval(expr, envir, enclos): object 'sureword_everything_df' not found
colnames(surewordcount)[1:32]<-surewords
## Error in eval(expr, envir, enclos): object 'surewords' not found
unsurewordcount<-data.frame(1)
for(j in 1:length(sureword_everything_df$body)){
for(i in 1:length(unsurewords)){
unsurewordcount[j,i]<-length(grep(unsurewords[i], sureword_everything_df$body[j]))
}
}
## Error in eval(expr, envir, enclos): object 'sureword_everything_df' not found
colnames(unsurewordcount)[1:32]<-unsurewords
## Error in eval(expr, envir, enclos): object 'unsurewords' not found
wordcount<-surewordcount-unsurewordcount
for(i in 1:length(wordcount[,1])){
sureword_everything_df$sureness[i]<-sum(wordcount[i,])
}
## Error in eval(expr, envir, enclos): object 'sureword_everything_df' not found
#filter out neutral sentiment
strong_sentiment<-sureword_everything_df[sureword_everything_df$sureness > 1 | sureword_everything_df$sureness < (-1),]
## Error in eval(expr, envir, enclos): object 'sureword_everything_df' not found
strong_sentiment<-sureword_everything_df[sureword_everything_df$sureness != 0,]
## Error in eval(expr, envir, enclos): object 'sureword_everything_df' not found
strong_sentiment<-droplevels(strong_sentiment)
## Error in droplevels(strong_sentiment): object 'strong_sentiment' not found
dois<-gsub('/', '%252F', strong_sentiment$id)
## Error in gsub("/", "%252F", strong_sentiment$id): object 'strong_sentiment' not found
strong_sentiment$readers<-NA
for {
if (class(try(...,silent=T))=="try-error") result[[i]] <- NA
...
}
for(i in 1:length(dois))
{
if(i%%50 == 0)
{
Sys.sleep(1)
}
else if(class(try(strong_sentiment$readers[i]<-details(dois[i], type = "doi")$stats$readers, silent=T))=="try-error")
{
strong_sentiment$readers[i]<-NA
print("NA")
}
}
## Error: <text>:2:5: unexpected '{'
## 1: strong_sentiment$readers<-NA
## 2: for {
## ^
sentiment_score = score.sentiment(source_filtered_df$text, poswords, negwords, .progress = 'text')
## Error in score.sentiment(source_filtered_df$text, poswords, negwords, : could not find function "score.sentiment"
paper_sentiment<-merge(source_filtered_df, sentiment_score, by = "text")
## Error in merge(source_filtered_df, sentiment_score, by = "text"): object 'source_filtered_df' not found
pos_sentiment<-subset(paper_sentiment, paper_sentiment$score >=2)
## Error in subset(paper_sentiment, paper_sentiment$score >= 2): object 'paper_sentiment' not found
neg_sentiment<-subset(paper_sentiment, paper_sentiment$score <=-2)
## Error in subset(paper_sentiment, paper_sentiment$score <= -2): object 'paper_sentiment' not found
strong_sentiment<-rbind(droplevels(pos_sentiment), droplevels(neg_sentiment))
## Error in droplevels(pos_sentiment): object 'pos_sentiment' not found
surebarplot<-ggplot(surecount_df, aes(x = reorder(Term, No_Articles), y = No_Articles)) + geom_bar() + coord_flip()
## Error in ggplot(surecount_df, aes(x = reorder(Term, No_Articles), y = No_Articles)): object 'surecount_df' not found
print(surebarplot)
## Error in print(surebarplot): object 'surebarplot' not found
unsurebarplot<-ggplot(unsurecount_df, aes(x = reorder(Term, No_Articles), y = No_Articles)) + geom_bar() + coord_flip()
## Error in ggplot(unsurecount_df, aes(x = reorder(Term, No_Articles), y = No_Articles)): object 'unsurecount_df' not found
print(unsurebarplot)
## Error in print(unsurebarplot): object 'unsurebarplot' not found
surenessvsreadership_plot<-ggplot(strong_sentiment, aes(x = sureness, y = readers)) + stat_boxplot(position = "dodge")
## Error in ggplot(strong_sentiment, aes(x = sureness, y = readers)): object 'strong_sentiment' not found
print(surenessvsreadership_plot)
## Error in print(surenessvsreadership_plot): object 'surenessvsreadership_plot' not found
strong_sentiment_no_body<-strong_sentiment[,-c(3:8)]
## Error in eval(expr, envir, enclos): object 'strong_sentiment' not found