lexicon <- read.csv("lexicon_ps.csv", stringsAsFactors=F)
econ.words <- lexicon$word[lexicon$polarity=="economy"]
imm.words <- lexicon$word[lexicon$polarity=="immigration"]
health.words <- lexicon$word[lexicon$polarity=="health_care"]
military.words <- lexicon$word[lexicon$polarity=="military"]
gun.words <- lexicon$word[lexicon$polarity=="gun_control"]
china.words <- lexicon$word[lexicon$polarity=="china"]
trade.words <- lexicon$word[lexicon$polarity=="trade"]
race.words <- lexicon$word[lexicon$polarity=="race"]
climate.words <- lexicon$word[lexicon$polarity=="climate_change"]
religion.words <- lexicon$word[lexicon$polarity=="religion"]
tc <- function(filename){
TweetCorpus <- paste(unlist(filename$text), collapse =" ")
TweetCorpus <- Corpus(VectorSource(TweetCorpus))
TweetCorpus <- tm_map(TweetCorpus, PlainTextDocument)
TweetCorpus <- tm_map(TweetCorpus, removePunctuation)
TweetCorpus <- tm_map(TweetCorpus, removeWords, stopwords('english'))
TweetCorpus <- tm_map(TweetCorpus, content_transformer(tolower),lazy=TRUE)
TweetCorpus <- tm_map(TweetCorpus, PlainTextDocument)
return(TweetCorpus)
}
tc_count <- function(filename, fname, person){
econ <- sum(str_count(filename, econ.words))
imm <- sum(str_count(filename, imm.words))
health <- sum(str_count(filename, health.words))
military <- sum(str_count(filename, military.words))
gun <- sum(str_count(filename, gun.words))
china <- sum(str_count(filename, china.words))
trade <- sum(str_count(filename, trade.words))
race <- sum(str_count(filename, race.words))
climate <- sum(str_count(filename, climate.words))
religion <- sum(str_count(filename, religion.words))
fn_df = data.frame(econ,
imm,
health,
military,
gun,
china,
trade,
race,
climate,
religion)
write.csv(fn_df, file = fname)
return(cnvrt_df(fn_df, person))
}
cnvrt_df <- function(filename, nameC){
filename$X <- NULL
filename <-t(filename)
filename <- data.frame(filename)
names(filename)[1]<-paste("num")
filename$term <- rownames(filename)
filename$name <- nameC
filename$rate <- filename$num / sum(filename$num)
return(filename)
}
term_plots <- function(data, title, color){
ggplot(data=data, aes(x=term, y=rate, fill=name)) +
geom_bar(stat="identity", position=position_dodge()) +
scale_fill_manual(values=color) +
ggtitle(title)
}
testHC <- head(HC, 1000)
testBS <- head(BS, 1000)
testTC <- head(TC, 1000)
testDT <- head(DT, 1000)
testdem <- head(dem, 1000)
testrep <- head(rep, 1000)
final_countHC <- tc_count(tc(testHC) , "HC_topics.csv", "Hillary Clinton")
final_countBS <- tc_count(tc(testBS) , "BS_topics.csv", "Bernie Sanders")
final_countTC <- tc_count(tc(testTC) , "TC_topics.csv", "Ted Cruz")
final_countDT <- tc_count(tc(testDT) , "DT_topics.csv", "Donald Trump")
final_count_dem <- tc_count(tc(testdem) , "dem_topics.csv", "Democrats")
final_count_rep <- tc_count(tc(testrep) , "rep_topics.csv", "Republican")
final_count_dems <- rbind(final_countHC, final_countBS)
final_count_reps <- rbind(final_countTC, final_countDT)
final_count_parties <- rbind(final_count_dem, final_count_rep)
Here is an example of what the dataframes look like:
head(final_countHC )
## num term name rate
## econ 3 econ Hillary Clinton 0.06976744
## imm 0 imm Hillary Clinton 0.00000000
## health 3 health Hillary Clinton 0.06976744
## military 4 military Hillary Clinton 0.09302326
## gun 9 gun Hillary Clinton 0.20930233
## china 0 china Hillary Clinton 0.00000000
head(final_count_dems)
## num term name rate
## econ 3 econ Hillary Clinton 0.06976744
## imm 0 imm Hillary Clinton 0.00000000
## health 3 health Hillary Clinton 0.06976744
## military 4 military Hillary Clinton 0.09302326
## gun 9 gun Hillary Clinton 0.20930233
## china 0 china Hillary Clinton 0.00000000
First defining some colors:
#dems
color1 <- c("#99CCFF", "#003399")
#reps
color2 <- c("#FF9999", "#FF6666")
#both parties
color3 <- c("#6699FF", "#FF6666")
And now for some plots:
term_plots(final_count_dems, "Rate of Topics per Democratic Candidate", color1)
term_plots(final_count_reps, "Rate of Topics per Republican Candidate", color2)
term_plots(final_count_parties, "Rate of Topics per Political Party", color3)