Refactoring Results

Functions used in this project

lexicon <- read.csv("lexicon_ps.csv", stringsAsFactors=F)
econ.words <- lexicon$word[lexicon$polarity=="economy"]
imm.words <- lexicon$word[lexicon$polarity=="immigration"]
health.words <- lexicon$word[lexicon$polarity=="health_care"]
military.words <- lexicon$word[lexicon$polarity=="military"]
gun.words <- lexicon$word[lexicon$polarity=="gun_control"]
china.words <- lexicon$word[lexicon$polarity=="china"]
trade.words <- lexicon$word[lexicon$polarity=="trade"]
race.words <- lexicon$word[lexicon$polarity=="race"]
climate.words <- lexicon$word[lexicon$polarity=="climate_change"]
religion.words <- lexicon$word[lexicon$polarity=="religion"]

tc <- function(filename){
  TweetCorpus <- paste(unlist(filename$text), collapse =" ") 
  TweetCorpus <- Corpus(VectorSource(TweetCorpus))
  TweetCorpus <- tm_map(TweetCorpus, PlainTextDocument)
  TweetCorpus <- tm_map(TweetCorpus, removePunctuation)
  TweetCorpus <- tm_map(TweetCorpus, removeWords, stopwords('english'))
  TweetCorpus <- tm_map(TweetCorpus, content_transformer(tolower),lazy=TRUE)
  TweetCorpus <- tm_map(TweetCorpus, PlainTextDocument)
  return(TweetCorpus)
}

tc_count <- function(filename, fname, person){
  econ <- sum(str_count(filename, econ.words))
  imm <- sum(str_count(filename, imm.words))
  health <- sum(str_count(filename, health.words))
  military <- sum(str_count(filename, military.words))
  gun <- sum(str_count(filename, gun.words))
  china <- sum(str_count(filename, china.words))
  trade <- sum(str_count(filename, trade.words))
  race <- sum(str_count(filename, race.words))
  climate <- sum(str_count(filename, climate.words))
  religion <- sum(str_count(filename, religion.words))
  
  fn_df = data.frame(econ, 
                     imm, 
                     health, 
                     military, 
                     gun, 
                     china,
                     trade,
                     race, 
                     climate, 
                     religion)
  write.csv(fn_df, file = fname)
  return(cnvrt_df(fn_df, person))
}

cnvrt_df <- function(filename, nameC){
  filename$X <- NULL
  filename <-t(filename)
  filename <- data.frame(filename)
  names(filename)[1]<-paste("num")
  filename$term <- rownames(filename)
  filename$name <- nameC
  filename$rate <- filename$num / sum(filename$num)
  return(filename)
}

term_plots <- function(data, title, color){
  ggplot(data=data, aes(x=term, y=rate, fill=name)) +
    geom_bar(stat="identity", position=position_dodge())  +
    scale_fill_manual(values=color) +
    ggtitle(title)
}

Below are the functions being called

testHC <- head(HC, 1000)
testBS <- head(BS, 1000)
testTC <- head(TC, 1000)
testDT <- head(DT, 1000)
testdem <- head(dem, 1000)
testrep <- head(rep, 1000)

final_countHC <- tc_count(tc(testHC) , "HC_topics.csv", "Hillary Clinton")
final_countBS <- tc_count(tc(testBS) , "BS_topics.csv", "Bernie Sanders")
final_countTC <- tc_count(tc(testTC) , "TC_topics.csv", "Ted Cruz")
final_countDT <- tc_count(tc(testDT) , "DT_topics.csv", "Donald Trump")
final_count_dem <- tc_count(tc(testdem) , "dem_topics.csv", "Democrats")
final_count_rep <- tc_count(tc(testrep) , "rep_topics.csv", "Republican")

final_count_dems <- rbind(final_countHC, final_countBS)
final_count_reps <- rbind(final_countTC, final_countDT)
final_count_parties <- rbind(final_count_dem, final_count_rep)

Here is an example of what the dataframes look like:

head(final_countHC )
##          num     term            name       rate
## econ       3     econ Hillary Clinton 0.06976744
## imm        0      imm Hillary Clinton 0.00000000
## health     3   health Hillary Clinton 0.06976744
## military   4 military Hillary Clinton 0.09302326
## gun        9      gun Hillary Clinton 0.20930233
## china      0    china Hillary Clinton 0.00000000
head(final_count_dems)
##          num     term            name       rate
## econ       3     econ Hillary Clinton 0.06976744
## imm        0      imm Hillary Clinton 0.00000000
## health     3   health Hillary Clinton 0.06976744
## military   4 military Hillary Clinton 0.09302326
## gun        9      gun Hillary Clinton 0.20930233
## china      0    china Hillary Clinton 0.00000000

First defining some colors:

#dems
color1 <- c("#99CCFF", "#003399")
#reps
color2 <- c("#FF9999", "#FF6666")
#both parties
color3 <- c("#6699FF", "#FF6666")

And now for some plots:

term_plots(final_count_dems, "Rate of Topics per Democratic Candidate", color1)

term_plots(final_count_reps, "Rate of Topics per Republican Candidate", color2)

term_plots(final_count_parties, "Rate of Topics per Political Party", color3)