Data Preparation

The data comes from the American National Election Studies (ANES), I uploaded file to my github : https://raw.githubusercontent.com/omerozeren/DATA606/master/Project/anes_timeseries_2016_rawdata.txt

I created sample data with selected columns that i believed have effect on presidential elections. The sample data survey questions are respondent’s gender , whether respondent voted for previous presidential election, health insurance, economic review , high school graduation status , marriage status , whether respondent believe in built a wall ,allow refugues to come USA, belive in global warming, how often goes Church, how frequently watch media,tempature to Democrat party.

I also uploaded data variable user guidance to understand each variable’s characteristics in my github :https://github.com/omerozeren/DATA606/blob/master/Project/anes_timeseries_2016_userguidecodebook.pdf .

# Load data via a pipe-delimited file, using character type for all columns
url <-"https://raw.githubusercontent.com/omerozeren/DATA606/master/Project/anes_timeseries_2016_rawdata.txt"
rawData <- read_delim(url,"|",
                      col_types= paste(rep("c",1290),sep="",collapse=""))
# Empty data frame
sample <- data.frame(ID = as.character(rawData$V160001))

sample$sex <- fct_recode(rawData$V161002,
                         "Male" = " 1", "Female" = " 2", NULL = "-1")

sample$votePre <- fct_recode(rawData$V161005, 
                              "Yes" = " 1", "No" = " 2", 
                              NULL = "-8", NULL = "-9")
                              
sample$health_ins <- fct_recode(rawData$V161112, 
                              "Yes" = " 1", "No" = " 2", 
                              NULL = "-8", NULL = "-9") 
                              
sample$heat_to_democ <- rawData$V162078

sample$econ_review <- fct_recode(rawData$V161139, 
                               "Very good" = " 1",
                               "Good" = " 2",
                               "Neither good nor bad" = " 3",
                               "Bad" = " 4",
                               "Very bad" = " 5",
                               "Don't know" = "-8",
                               "Refused" = "-9") 
                                                              
                                                         
sample$high_school <- fct_recode(rawData$V161271, 
                              "Yes" = " 1",
                               "No" = " 2",
                               "Inapplicable" = "-1",
                               "Refused" = "-9")

sample$marriage <- fct_recode(rawData$V161268, 
                              "Married" = " 1",
                               "Married" = " 2",
                               "Widowed" = " 3",
                               "Divorced" = " 4",
                               "Separated" = " 5",
                               "Never married" = " 6",
                               "Refused" = "-9")  
                               


sample$built_wall <- fct_recode(rawData$V161196x, 
                              "Favor a great deal" = " 1",
                               "Favor a moderate amount" = " 2",
                               "Favor a little" = " 3",
                               "Neither favor nor oppose" = " 4",
                               "Oppose a little" = " 5",
                               "Oppose a moderate amount" = " 6",
                               "Oppose a great deal" = " 7",
                               "Don't know" = "-8",
                               "Refused" = "-9") 

sample$allow_refugees <- fct_recode(rawData$V161214x, 
                              "Favor a great deal" = " 1",
                               "Favor a moderate amount" = " 2",
                               "Favor a little" = " 3",
                               "Neither favor nor oppose" = " 4",
                               "Oppose a little" = " 5",
                               "Oppose a moderate amount" = " 6",
                               "Oppose a great deal" = " 7",
                               "Don't know" = "-8",
                               "Refused" = "-9") 

sample$global_warming <- fct_recode(rawData$V161221, 
                              "Yes" = " 1",
                               "No" = " 2",
                               "Don't know" = "-8",
                               "Refused" = "-9")


sample$church <- fct_recode(rawData$V161245, 
                              "Every week" = " 1",
                                "Almost every week" = " 2",
                                 "Once or twice a month" = " 3",
                                  "A few times a year" = " 4",
                                   "Never" = " 5",
                                   "Inapplicable" = "-1",
                                   "Refused" = "-9")  


sample$media <- fct_recode(rawData$V161009, 
                              "A great deal" = " 1",
                               "A lot" = " 2",
                               "A moderate amount" = " 3",
                               "A little" = " 4",
                               "None at all" = " 5",
                               "Inapplicable" = "-1",
                               "Refused" = "-9")



sample$clinton <- as.numeric(rawData$V161086)
sample$clinton[sample$clinton < 0] <- NA
sample$trump <- as.numeric(rawData$V161087)
sample$trump[sample$trump < 0] <- NA

Research Question

Does a correlation exist between a voter’s characteristics/reviews to the presidential candidates in the 2016 election?

Cases

Each case is a person surveyed. There are 4270 observations and 1290 variables in the data set.

Data Collection

Data was collected by survey.

Type of Study

This is an observational study.

Data Source

Data was retreived from the ANES website

Response

The response variable is rating of candidates “Trump ,Clinton” that we will be using numeric(Quantitative).

Explanatory

The explanatory variables are the respondent’s frequency of use of various media (web, print, TV), repondent’s gender status, repondent’s previous election vote status,health insurance status, economic review , high school graduation status , marriage status , belive in build a wall , global warming, and allowing refegues , frequency of going church,tempature to democrat party(Quantitative).

Relevant summary statistics

sample %>% filter(!is.na(clinton), !is.na(trump)) %>% summary(.)
##        ID           sex       votePre     health_ins  heat_to_democ     
##     1   :   1   Male  : 558   Yes :3082   Yes :3835   Length:4208       
##     2   :   1   Female: 616   No  :1110   No  : 367   Class :character  
##     3   :   1   NA's  :3034   NA's:  16   NA's:   6   Mode  :character  
##     4   :   1                                                           
##     5   :   1                                                           
##     6   :   1                                                           
##  (Other):4202                                                           
##                econ_review         high_school            marriage   
##  Don't know          :   3   Inapplicable:3421   Refused      :  25  
##  Refused             :   4   Refused     :   0   Married      :2116  
##  Very good           :  67   Yes         : 651   Widowed      : 295  
##  Good                : 953   No          : 136   Divorced     : 656  
##  Neither good nor bad:1354                       Separated    :  93  
##  Bad                 :1356                       Never married:1023  
##  Very bad            : 471                                           
##                     built_wall                    allow_refugees
##  Oppose a great deal     :1494   Oppose a great deal     :1355  
##  Neither favor nor oppose: 911   Neither favor nor oppose:1171  
##  Favor a great deal      : 871   Oppose a moderate amount: 519  
##  Favor a moderate amount : 405   Favor a moderate amount : 431  
##  Oppose a moderate amount: 345   Favor a great deal      : 386  
##  Oppose a little         :  84   Favor a little          : 158  
##  (Other)                 :  98   (Other)                 : 188  
##     global_warming                   church                   media     
##  Don't know:  31   Inapplicable         :1694   Inapplicable     :  74  
##  Refused   :  19   Refused              :   3   Refused          :   1  
##  Yes       :3410   Every week           : 779   A great deal     : 903  
##  No        : 748   Almost every week    : 533   A lot            :1057  
##                    Once or twice a month: 463   A moderate amount:1376  
##                    A few times a year   : 688   A little         : 727  
##                    Never                :  48   None at all      :  70  
##     clinton           trump       
##  Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  2.00   1st Qu.:  0.00  
##  Median : 40.00   Median : 30.00  
##  Mean   : 42.07   Mean   : 36.98  
##  3rd Qu.: 70.00   3rd Qu.: 70.00  
##  Max.   :100.00   Max.   :100.00  
## 

High Scholl graduated and global warming belivers

# summary statistics
ratings <- sample %>% gather(key = "candidate", value = "rating", 
                             clinton, trump) %>% 
  filter(high_school == "Yes", global_warming == "Yes") %>%
  select(media, candidate, rating)


# Box plot of individual candidate ratings
ratings %>% filter(!is.na(rating)) %>% 
  ggplot(aes(x=candidate, y=rating, fill=candidate)) +
  geom_boxplot(fill=c("blue","red")) +
  ggtitle("Respondent Ratings of Candidates") +
  xlab("Candidate") +
  ylab("Rating") +
  scale_x_discrete(labels=c("Hillary Clinton","Donald Trump"))

# Box plot of Trump ratings 
ratings %>% filter(candidate=="trump", !is.na(rating)) %>%
  ggplot(aes(x=media, y=rating)) +
  geom_boxplot(fill="grey") +
  ggtitle("Respondent Ratings of Donald Trump") +
  xlab("How frequently watch  Media") +
  ylab("Rating") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Box plot of Clinton ratings
ratings %>% filter(candidate=="clinton", !is.na(rating)) %>%
  ggplot(aes(x=media, y=rating)) +
  geom_boxplot(fill="grey") +
  ggtitle("Respondent Ratings of Hillary Clinton") +
  xlab("How frequently watch  Media") +
  ylab("Rating") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))