Problem Set 3

# Header ------------------------------------------------------------------

# File Name:          problemset3
# File Purpose:       R code for Problem Set 3
# Author:             Nakadi Yash (University of Pennsylvania)
# Date Created:     2020/06/15
# Hours Worked:     3-4
# Notes:            

# Version Log:
# Version           Date          Notes
# 1.0               2020/06/15    Created

# Install Packages & WD ---------------------------------------------------

suppressMessages(library("tidyverse"))
suppressMessages(library("dplyr"))
suppressMessages(library("nycflights13"))

#set wd
setwd("C:/Users/rnaka/Desktop/Penn Freshman Year")

# Introductory Commands -------------------------------------------------------------

# Include your introductory commands below (set the working directory and load the tidyverse)
install.packages("dplyr")
install.packages("tidyverse")
library(tidyverse, dplyr)
setwd("C:/Users/rnaka/Desktop/Penn Freshman Year")

# Load the data file "SurveyPoliticalActivism_WashingtonPostSSRS_data_v1_20200608.csv"
dta1 = read.csv("SurveyPoliticalActivism_WashingtonPostSSRS_data_v1_20200608.csv")

# Examine the data using the appropriate command(s)
dim(dta1)

## [1] 1850  204

colnames(dta1)

##   [1] "id"               "halfsamp2"        "timestrt"        
##   [4] "qn1a"             "qn1b"             "activistvar"     
##   [7] "qne"              "scrnend"          "scrnlen"         
##  [10] "qn2"              "q3"               "qn4a"            
##  [13] "qn4aa"            "qn4b"             "qn4ab"           
##  [16] "qn4c"             "qn4ac"            "qn4d"            
##  [19] "qn4ad"            "qn4e"             "qn4ae"           
##  [22] "qn4f"             "qn4af"            "qn4g"            
##  [25] "qn4ag"            "qn4h"             "qn4ah"           
##  [28] "qn4i"             "qn4ai"            "qn5"             
##  [31] "qn6_01"           "qn6_02"           "qn6_03"          
##  [34] "qn6_04"           "qn6_05"           "qn6_06"          
##  [37] "qn6_07"           "qn6_08"           "qn6_09"          
##  [40] "qn6_10"           "qn6_11"           "qn6_12"          
##  [43] "qn6_13"           "qn6_14"           "qn6_15"          
##  [46] "qn6_16"           "qn6_17"           "qn6_18"          
##  [49] "qn6ot_1"          "qn6ot_2"          "qn6ot_3"         
##  [52] "qn6ot_4"          "qn6ot_5"          "qn7"             
##  [55] "qn8a"             "qn8b"             "qn8c"            
##  [58] "qn8d"             "qn8e"             "qn9"             
##  [61] "qn10"             "qn10vb"           "qn11"            
##  [64] "qn12"             "qn13"             "halfsamp"        
##  [67] "qn14a"            "qn14b"            "qn14c"           
##  [70] "qn14d"            "qn14e"            "qn14f"           
##  [73] "qn15a"            "qn15b"            "qn15c"           
##  [76] "qn15d"            "qn15e"            "qn15f"           
##  [79] "qn15g"            "qn16a"            "qn16b"           
##  [82] "qn16c"            "qn16d"            "qn17"            
##  [85] "qn18a"            "qn18b"            "qn18c"           
##  [88] "qn18d"            "qn19"             "qn20a"           
##  [91] "qn20b"            "qn20c"            "qn20d"           
##  [94] "qn21"             "qn22a"            "qn22b"           
##  [97] "qn22c"            "qn22d"            "qn22e"           
## [100] "qn22f"            "qn22g"            "qn22h"           
## [103] "qn22i"            "qn22j"            "qn23"            
## [106] "qn24"             "qn25"             "party5"          
## [109] "qn25a"            "qn25b"            "qn26"            
## [112] "qn27"             "qn28"             "qn29"            
## [115] "qn30"             "qn31"             "qn32"            
## [118] "qn33"             "qn34"             "qn35"            
## [121] "qn36"             "qn37a"            "qn37b"           
## [124] "qn37c"            "qn38"             "qn39"            
## [127] "qn40_1"           "qn40_2"           "qn40_3"          
## [130] "qn40_4"           "qn40_5"           "qn40_6"          
## [133] "qn40ot"           "qn41"             "qn42"            
## [136] "qn43"             "qn44"             "qn45"            
## [139] "qn46"             "qn46a"            "qn47"            
## [142] "qn48_1"           "qn48_2"           "qn48"            
## [145] "rsex"             "gender"           "qn49"            
## [148] "qn50"             "agevar"           "agevar2"         
## [151] "agevar3"          "qn51"             "qn51a"           
## [154] "qn51b"            "qn52"             "qn53"            
## [157] "qn54"             "qn55"             "qn55a"           
## [160] "qn55aot"          "qnd10"            "qnd10a"          
## [163] "qnd11"            "qnd11ot"          "racethn"         
## [166] "racethn2"         "qnd12"            "receduc"         
## [169] "marital"          "income"           "income1"         
## [172] "recincome"        "qnl1"             "qnc1"            
## [175] "qndhh1"           "halfsampc"        "int1"            
## [178] "inct1a"           "qni1"             "qni2"            
## [181] "gov2018"          "gov22018"         "sen2018"         
## [184] "sen22018"         "house2018"        "house22018"      
## [187] "comp2018"         "cstate"           "cregion"         
## [190] "density"          "intvwdate"        "inteview_length" 
## [193] "wt1"              "weight"           "cdc_urban"       
## [196] "activistvar_orig" "timeend"          "length"          
## [199] "lang"             "Division"         "changesex"       
## [202] "hphoneuse"        "USR"              "cdc_USR"

glimpse(dta1)

## Rows: 1,850
## Columns: 204
## $ id               <int> 10000002, 10000003, 10000004, 10000005, 10000006, ...
## $ halfsamp2        <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ timestrt         <int> 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ qn1a             <chr> "No have not done in the past two years", "Yes don...
## $ qn1b             <chr> "No have not done in the past two years", "No have...
## $ activistvar      <chr> "Non-Activist", "Activist", "Non-Activist", "Non-A...
## $ qne              <chr> "Male", "Female", "Female", "Female", "Female", "M...
## $ scrnend          <int> 67, 118, 54, 75, 58, 57, 69, 71, 132, 39, 66, 87, ...
## $ scrnlen          <int> 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1,...
## $ qn2              <chr> "Most of the time", "Most of the time", "Most of t...
## $ q3               <chr> "No, not an activist", "No, not an activist", "No,...
## $ qn4a             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn4aa            <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn4b             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn4ab            <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn4c             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn4ac            <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn4d             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn4ad            <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn4e             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn4ae            <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn4f             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn4af            <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn4g             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn4ag            <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn4h             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn4ah            <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn4i             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn4ai            <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn5              <chr> " ", "Yes", " ", " ", " ", " ", "No", " ", " ", " ...
## $ qn6_01           <chr> " ", "NOT Black Lives Matter/racial equality", " "...
## $ qn6_02           <chr> " ", "NOT Other equality/rights mentions", " ", " ...
## $ qn6_03           <chr> " ", "NOT President Trump/Trump administration", "...
## $ qn6_04           <chr> " ", "NOT Other political candidates/campaign ment...
## $ qn6_05           <chr> " ", "NOT Other misc. political/government/economi...
## $ qn6_06           <chr> " ", "NOT Education (funding/common core/school ch...
## $ qn6_07           <chr> " ", "NOT Science/march for science", " ", " ", " ...
## $ qn6_08           <chr> " ", "NOT International issues", " ", " ", " ", " ...
## $ qn6_09           <chr> " ", "NOT Labor rights/unions", " ", " ", " ", " "...
## $ qn6_10           <chr> " ", "NOT Taxes", " ", " ", " ", " ", "NOT Taxes",...
## $ qn6_11           <chr> " ", "NOT Healthcare general mentions (Medicare, M...
## $ qn6_12           <chr> " ", "NOT Religious mentions (religious freedom, e...
## $ qn6_13           <chr> " ", "NOT Marijuana legalization", " ", " ", " ", ...
## $ qn6_14           <chr> " ", "NOT Animal rights/wildlife conservations", "...
## $ qn6_15           <chr> " ", "No", " ", " ", " ", " ", "NOT No", " ", " ",...
## $ qn6_16           <chr> " ", "NOT Other", " ", " ", " ", " ", "NOT Other",...
## $ qn6_17           <chr> " ", "NOT Don't know", " ", " ", " ", " ", "NOT Do...
## $ qn6_18           <chr> " ", "NOT Refused", " ", " ", " ", " ", "NOT Refus...
## $ qn6ot_1          <chr> " ", " ", " ", " ", " ", " ", "un rally in support...
## $ qn6ot_2          <chr> " ", " ", " ", " ", " ", " ", "<blank>", " ", " ",...
## $ qn6ot_3          <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn6ot_4          <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn6ot_5          <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn7              <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn8a             <chr> " ", "No", " ", " ", " ", " ", "Yes", " ", " ", " ...
## $ qn8b             <chr> " ", "Yes", " ", " ", " ", " ", "No", " ", " ", " ...
## $ qn8c             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn8d             <chr> " ", "No", " ", " ", " ", " ", "No", " ", " ", " "...
## $ qn8e             <chr> " ", "No", " ", " ", " ", " ", "Yes", " ", " ", " ...
## $ qn9              <chr> " ", "Yes", " ", " ", " ", " ", " ", " ", " ", " "...
## $ qn10             <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn10vb           <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn11             <chr> " ", "National level", " ", " ", " ", " ", "Nation...
## $ qn12             <chr> " ", "More active", " ", " ", " ", " ", "Just as a...
## $ qn13             <chr> " ", "17", " ", " ", " ", " ", "18", " ", " ", " "...
## $ halfsamp         <chr> "B", " ", "A", "B", "A", "B", " ", "A", "B", "B", ...
## $ qn14a            <chr> " ", " ", "Not a major reason", " ", "Major reason...
## $ qn14b            <chr> " ", " ", "Not a major reason", " ", "Not a major ...
## $ qn14c            <chr> " ", " ", "Major reason", " ", "Major reason", " "...
## $ qn14d            <chr> "Not a major reason", " ", " ", "Major reason", " ...
## $ qn14e            <chr> "Major reason", " ", " ", "Not a major reason", " ...
## $ qn14f            <chr> "Not a major reason", " ", " ", "Not a major reaso...
## $ qn15a            <chr> "No have not done in the past two years", "Yes hav...
## $ qn15b            <chr> "Yes have done in the past two years", "Yes have d...
## $ qn15c            <chr> "Yes have done in the past two years", "Yes have d...
## $ qn15d            <chr> "Yes have done in the past two years", "Yes have d...
## $ qn15e            <chr> "No have not done in the past two years", "Yes hav...
## $ qn15f            <chr> "No have not done in the past two years", "Yes hav...
## $ qn15g            <chr> "Yes have done in the past two years", "No have no...
## $ qn16a            <chr> "Once or twice a month", "Less than that", "A few ...
## $ qn16b            <chr> "Less than that", "Less than that", "Once or twice...
## $ qn16c            <chr> "Less than that", "Never (Vol.)", "Never (Vol.)", ...
## $ qn16d            <chr> "A few times a week", "Less than that", "Never (Vo...
## $ qn17             <chr> "Local issues", "National issues", "Local issues",...
## $ qn18a            <chr> "About the same", "About the same", "Less", "More"...
## $ qn18b            <chr> "About the same", "About the same", "About the sam...
## $ qn18c            <chr> "About the same", "Don't Know", "Less", "More", "M...
## $ qn18d            <chr> "About the same", "About the same", "More", "More"...
## $ qn19             <chr> "Too far in restricting the right to protest", "Do...
## $ qn20a            <chr> "No, never appropriate", "No, never appropriate", ...
## $ qn20b            <chr> "Yes, ever appropriate", "Yes, ever appropriate", ...
## $ qn20c            <chr> "Yes, ever appropriate", "No, never appropriate", ...
## $ qn20d            <chr> "Yes, ever appropriate", "No, never appropriate", ...
## $ qn21             <chr> "Right to demonstrate peacefully", "Should be bann...
## $ qn22a            <chr> "Oppose", "Oppose", "Oppose", "Oppose", "Oppose", ...
## $ qn22b            <chr> "Support", "Oppose", "Support", "Oppose", "Support...
## $ qn22c            <chr> "Support", "Support", "Support", "Support", "Suppo...
## $ qn22d            <chr> "Support", "Support", "Support", "Support", "Suppo...
## $ qn22e            <chr> "Support", "Support", "Oppose", "Support", "Suppor...
## $ qn22f            <chr> "Oppose", "Oppose", "Oppose", "Support", "Oppose",...
## $ qn22g            <chr> "Oppose", "Oppose", "Oppose", "Oppose", "Oppose", ...
## $ qn22h            <chr> "Support", "Support", "Support", "Support", "Suppo...
## $ qn22i            <chr> "Don't Know", "Support", "Support", "Support", "Su...
## $ qn22j            <chr> "Don't Know", "Support", "Oppose", "Oppose", "Supp...
## $ qn23             <chr> "Enriching", "Enriching", "Enriching", "Enriching"...
## $ qn24             <chr> "An Independent", "No answer/Refused", "A Democrat...
## $ qn25             <chr> "Democratic party", "No answer/Refused", " ", " ",...
## $ party5           <chr> "INDEPENDENT/LEAN DEMOCRAT", "DON'T KNOW/REFUSED",...
## $ qn25a            <chr> " ", " ", " ", "Strong Republican", " ", " ", "Don...
## $ qn25b            <chr> " ", " ", "Strong Democrat", " ", "Strong Democrat...
## $ qn26             <chr> "Somewhat hopeful", "Very fearful", "Very hopeful"...
## $ qn27             <chr> "None at all", "Not very much", "A fair amount", "...
## $ qn28             <chr> "About the same", "About the same", "About the sam...
## $ qn29             <chr> "A fair amount", "None at all", "A fair amount", "...
## $ qn30             <chr> "Not very much", "A fair amount", "A great deal", ...
## $ qn31             <chr> "Disagree strongly", "Neither agree nor disagree",...
## $ qn32             <chr> "Not too well", "Not well at all", "Not too well",...
## $ qn33             <chr> "Somewhat well", "Not too well", "Very well", "Som...
## $ qn34             <chr> "Too much", "Too much", "About the right amount", ...
## $ qn35             <chr> "Too little", "About the right amount", "About the...
## $ qn36             <chr> "I like elected officials who make compromises wit...
## $ qn37a            <chr> "Crossed the line", "Acceptable", "Crossed the lin...
## $ qn37b            <chr> "Crossed the line", "Crossed the line", "Crossed t...
## $ qn37c            <chr> "Crossed the line", "Crossed the line", "Crossed t...
## $ qn38             <chr> "Stay about the same", "Stay about the same", "Sta...
## $ qn39             <chr> "No", "No", "Yes", "No", "No", "Yes", "No", "No", ...
## $ qn40_1           <chr> " ", " ", "NOT Republican Party or Republican cand...
## $ qn40_2           <chr> " ", " ", "Democratic Party or Democratic candidat...
## $ qn40_3           <chr> " ", " ", "NOT Independent candidate", " ", " ", "...
## $ qn40_4           <chr> " ", " ", "NOT Different group", " ", " ", "NOT Di...
## $ qn40_5           <chr> " ", " ", "NOT Don't Know", " ", " ", "NOT Don't K...
## $ qn40_6           <chr> " ", " ", "NOT Refused", " ", " ", "NOT Refused", ...
## $ qn40ot           <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qn41             <chr> "Somewhat disapprove", "Strongly disapprove", "Str...
## $ qn42             <chr> " ", " ", " ", "Follow President Trump's leadershi...
## $ qn43             <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "...
## $ qn44             <chr> "Always", "Always", "Always", "Always", "Always", ...
## $ qn45             <chr> "Absolutely certain to vote", "Absolutely certain ...
## $ qn46             <chr> "Neither (Vol.)", "Don't Know", "Democratic candid...
## $ qn46a            <chr> "Democratic candidate", "Don't Know", " ", "Don't ...
## $ qn47             <chr> "Voted", "Voted", "Voted", "Voted", "Voted", "Vote...
## $ qn48_1           <chr> "2-1", "1-2", "1-2", "2-1", "2-1", "1-2", "1-2", "...
## $ qn48_2           <chr> "4-3", "4-3", "4-3", "3-4", "4-3", "4-3", "3-4", "...
## $ qn48             <chr> "Someone else (Vol.)", "Clinton", "Clinton", "Trum...
## $ rsex             <chr> "Male", "Female", "Female", "Female", "Female", "M...
## $ gender           <chr> "MALE", "FEMALE", "FEMALE", "FEMALE", "FEMALE", "M...
## $ qn49             <chr> "54", "(DO NOT READ) Refused", "70", "70", "70", "...
## $ qn50             <chr> " ", "50-64", " ", " ", " ", " ", " ", " ", " ", "...
## $ agevar           <chr> "50-64", "50-64", "65+", "65+", "65+", "65+", "30-...
## $ agevar2          <chr> "50-64", "50-64", "65+", "65+", "65+", "65+", "40-...
## $ agevar3          <chr> "50-64", "50-64", "65+", "65+", "65+", "65+", "35-...
## $ qn51             <chr> "Moderate", "Moderate", "Moderate", "Moderate", "L...
## $ qn51a            <chr> " ", " ", " ", " ", "Very", " ", " ", " ", " ", " ...
## $ qn51b            <chr> " ", " ", " ", " ", " ", " ", "Very", "Somewhat", ...
## $ qn52             <chr> "Yes, supporter of the Black Lives Matter movement...
## $ qn53             <chr> "No", "Haven't heard of it (Vol.)", "No", "No", "N...
## $ qn54             <chr> "No", "No", "No", "No", "No", "No", "No", "No", "N...
## $ qn55             <chr> "Not employed", "Full-time", "Full-time", "Not emp...
## $ qn55a            <chr> "Retired", " ", " ", "Retired", "Retired", "Retire...
## $ qn55aot          <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qnd10            <chr> "No", "No", "No", "No", "No", "No", "No", "No", "N...
## $ qnd10a           <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qnd11            <chr> "White", "White", "Black or African American", "Wh...
## $ qnd11ot          <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ racethn          <chr> "WHITE, NON-HISPANIC", "WHITE, NON-HISPANIC", "BLA...
## $ racethn2         <chr> "WHITE, NON-HISPANIC", "WHITE, NON-HISPANIC", "BLA...
## $ qnd12            <chr> "Graduate school or more", "Graduate school or mor...
## $ receduc          <chr> "COLLEGE GRAD OR MORE", "COLLEGE GRAD OR MORE", "C...
## $ marital          <chr> "Married", "Married", "Single", "Widowed", "Marrie...
## $ income           <chr> "75 to under 100 thousand", "Refused", "50 to unde...
## $ income1          <chr> " ", "Refused", " ", "Over $100,000", " ", " ", " ...
## $ recincome        <chr> "$50K - $99.9K", "Don't Know/Refused", "$50K - $99...
## $ qnl1             <chr> "Yes respondent or someone else has cell phone in ...
## $ qnc1             <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qndhh1           <chr> "2", "5", "2", "2", "3", "2", "2", "2", "2", "1", ...
## $ halfsampc        <chr> "D", " ", "C", "C", "D", "D", " ", "C", "C", "D", ...
## $ int1             <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ inct1a           <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", ...
## $ qni1             <chr> "Male", "Male", "Female", "Female", "Male", "Male"...
## $ qni2             <chr> "Black non-Hispanic", "White non-Hispanic", "White...
## $ gov2018          <chr> "Toss up", "Toss up", "Not competitive", "Not comp...
## $ gov22018         <chr> "Toss up/Lean", "Toss up/Lean", "Not competitive",...
## $ sen2018          <chr> "Lean", "Not competitive", "Not competitive", "Not...
## $ sen22018         <chr> "Toss up/Lean", "Not competitive", "Not competitiv...
## $ house2018        <chr> "Not comptetitive", "Not comptetitive", "Not compt...
## $ house22018       <chr> "Not competitive", "Not competitive", "Not competi...
## $ comp2018         <chr> "Yes", "Yes", "No", "No", "No", "No", "No", "No", ...
## $ cstate           <chr> "Florida", "Michigan", "Delaware", "New Jersey", "...
## $ cregion          <chr> "South", "North Central", "South", "Northeast", "N...
## $ density          <chr> "4", "4", "4", "Highest", "3", "Highest", "Highest...
## $ intvwdate        <int> 180124, 180124, 180124, 180124, 180124, 180124, 18...
## $ inteview_length  <dbl> 27.5, 23.1, 20.1, 20.6, 20.6, 18.1, 29.3, 21.6, 24...
## $ wt1              <dbl> 0.7595, 0.8912, 0.7595, 0.7595, 0.8912, 0.7595, 0....
## $ weight           <dbl> 1.0023, 0.3072, 0.9798, 0.8714, 1.3534, 0.9606, 0....
## $ cdc_urban        <chr> "Medium metro", "Large fringe metro", "Large fring...
## $ activistvar_orig <chr> "Non-Activist (originally)", "Activist (originally...
## $ timeend          <int> 1657, 1383, 1207, 1236, 1238, 1084, 1755, 1294, 14...
## $ length           <dbl> 27.5, 23.1, 20.1, 20.6, 20.6, 18.1, 29.3, 21.6, 24...
## $ lang             <chr> "English", "English", "English", "English", "Engli...
## $ Division         <chr> "South Atlantic", "East North Central", "South Atl...
## $ changesex        <chr> "sex = rsex", "sex = rsex", "sex = rsex", "sex = r...
## $ hphoneuse        <chr> "Dual HH", "Dual HH", "Dual HH", "Dual HH", "Dual ...
## $ USR              <chr> "Suburban", "Suburban", "Suburban", "Suburban", "S...
## $ cdc_USR          <chr> "Suburban", "Suburban", "Suburban", "Urban", "Subu...

# Create a new data frame with the following columns from the dataset: id,qn1b,qn4f,qn4af,qn4i,qn4ai,qn20a,qn20b,qn20c,qn20d,party5,qn49
dta2 = dta1[ ,which(colnames(dta1)%in%c("id","qn1b","qn4f","qn4af","qn4i","qn4ai","qn20a","qn20b","qn20c","qn20d","party5","qn49"))]
head(dta2)

##         id                                   qn1b qn4f qn4af qn4i qn4ai
## 1 10000002 No have not done in the past two years                      
## 2 10000003 No have not done in the past two years   No         No      
## 3 10000004 No have not done in the past two years                      
## 4 10000005 No have not done in the past two years                      
## 5 10000006 No have not done in the past two years                      
## 6 10000007 No have not done in the past two years                      
##                   qn20a                 qn20b                 qn20c
## 1 No, never appropriate Yes, ever appropriate Yes, ever appropriate
## 2 No, never appropriate Yes, ever appropriate No, never appropriate
## 3 No, never appropriate No, never appropriate No, never appropriate
## 4 No, never appropriate No, never appropriate No, never appropriate
## 5 Yes, ever appropriate Yes, ever appropriate Yes, ever appropriate
## 6 Yes, ever appropriate Yes, ever appropriate Yes, ever appropriate
##                   qn20d                    party5                  qn49
## 1 Yes, ever appropriate INDEPENDENT/LEAN DEMOCRAT                    54
## 2 No, never appropriate        DON'T KNOW/REFUSED (DO NOT READ) Refused
## 3 No, never appropriate                  DEMOCRAT                    70
## 4 No, never appropriate                REPUBLICAN                    70
## 5 No, never appropriate                  DEMOCRAT                    70
## 6 No, never appropriate                  DEMOCRAT                    73

# Rename the columns to meaningful values
# Hint: Check the codebook for more information on what the columns represent. You may need to go back and forth between the codebook and the data a few times to figure out exactly what each column represents)
colnames(dta2) <- c("id", "AttendanceatProtests", "PoliceProtest", "DefendingCriticizingPolice", "ConfederateProtest", "SupportingOpposingRemoval", "ProtestRallyDisruption", "AnthemProtest", "FlagBurning", "BlockTraffic", "PartyAffiliation", "Age")

# Question 1 --------------------------------------------------------------

# Using forcats::fct_count, calculate the proportion of respondents who attended an organized protest, march, or demonstration of any kind in the past two years.
forcats::fct_count(factor(dta2$AttendanceatProtests,levels=c("No have not done in the past two years","Yes done in the past two years"),labels=c("No","Yes")), prop = T)

## # A tibble: 2 x 3
##   f         n     p
##   <fct> <int> <dbl>
## 1 No     1393 0.753
## 2 Yes     457 0.247

# Question 2 --------------------------------------------------------------

# Using base::prop.table, calculate the proportion of members of each political party who attended a protest. Present your results as a nicely formatted table (e.g. "Lean Democrat" and "Lean Republican" should be "Democrat" and "Republican", and so on).
# Hint: Look at the help file for base::prop.table to identify the argument for "margin"

base::prop.table(table(factor(dta2$PartyAffiliation,levels=c("DEMOCRAT","INDEPENDENT/LEAN DEMOCRAT", "INDEPENDENT/DON'T LEAN", "INDEPENDENT/LEAN REPUBLICAN", "REPUBLICAN"),labels=c("DEMOCRAT", "DEMOCRAT", "INDEPENDENT", "REPUBLICAN", "REPUBLICAN")), factor(dta2$AttendanceatProtests,levels=c("No have not done in the past two years","Yes done in the past two years"),labels=c("No","Yes"))), margin = 1)

##              
##                       No        Yes
##   DEMOCRAT    0.61780105 0.38219895
##   INDEPENDENT 0.87586207 0.12413793
##   REPUBLICAN  0.90014265 0.09985735

# Question 3 --------------------------------------------------------------

# What is the average age of those who have attended an organized protest, march, or demonstration of any kind in the past two years? What is the average age of those who did not?
# Hint: You will need to manipulate qn49 in order to answer this question. For those with ages "97 or older," code their age as 97.
dta2 %>% 
  filter(!dta2$Age%in%1:100) %>% 
  select(Age, id)

##                         Age       id
## 1     (DO NOT READ) Refused 10000003
## 2     (DO NOT READ) Refused 10000018
## 3     (DO NOT READ) Refused 10000035
## 4     (DO NOT READ) Refused 10000081
## 5               97 or older 10000095
## 6     (DO NOT READ) Refused 10000128
## 7     (DO NOT READ) Refused 15000012
## 8     (DO NOT READ) Refused 15000058
## 9  (DO NOT READ) Don’t know 15000129
## 10    (DO NOT READ) Refused 30000174
## 11    (DO NOT READ) Refused 30000325
## 12    (DO NOT READ) Refused 30000336
## 13    (DO NOT READ) Refused 30000553
## 14    (DO NOT READ) Refused 30000593
## 15              97 or older 30000675
## 16    (DO NOT READ) Refused 40000032
## 17    (DO NOT READ) Refused 40000057
## 18    (DO NOT READ) Refused 40000062
## 19    (DO NOT READ) Refused 50000292
## 20    (DO NOT READ) Refused 55000028
## 21    (DO NOT READ) Refused 55000048
## 22    (DO NOT READ) Refused 55000281
## 23    (DO NOT READ) Refused 55000340
## 24    (DO NOT READ) Refused 55000388
## 25    (DO NOT READ) Refused 70000050
## 26    (DO NOT READ) Refused 70000187
## 27    (DO NOT READ) Refused 70000217
## 28    (DO NOT READ) Refused 75000064

dta2$Age[!dta2$Age%in%1:100] <- NA
dta2$Age[dta2$id%in%c(10000095, 30000675)] <- 97
dta2$AgeNumber = as.numeric(dta2$Age)
table(dta2$AgeNumber == dta2$Age)

## 
## TRUE 
## 1824

protestAge = round(
    mean(dta2$AgeNumber[dta2$AttendanceatProtests=="Yes done in the past two years"],na.rm=T)
   )
nonprotestage = round(
     mean(dta2$AgeNumber[dta2$AttendanceatProtests=="No have not done in the past two years"],na.rm=T)
   )
cat("Average age for protestor:", protestAge)

## Average age for protestor: 51

cat("Average age for non-protestor:", nonprotestage)

## Average age for non-protestor: 56

# Question 4 --------------------------------------------------------------

# Of those who attended an organized protest, march, or demonstration of any kind in the past two years, what proportion attended an event critizicing police conduct and interactions with citizens OR supporting the removal of Confederate monuments from government property?
# To answer this question, write an if/else statement within a loop to create a new binary variable called "RacialJusticeEvent"; your loop will have an index "for(i in which(...))" (you need to fill in the dots)
# Hint: Your answer should employ variables qn4af and qn4ai

dta2$RacialJusticeEvent <- NA
for(i in which(dta2$AttendanceatProtests == "Yes done in the past two years")) {
  if(dta2$DefendingCriticizingPolice[i] == "To CRITICIZE police conduct") {
    dta2$RacialJusticeEvent[i] <- 1
  }
  else if(dta2$SupportingOpposingRemoval[i] == "To support the removal of Confederate monuments") {
    dta2$RacialJusticeEvent[i] <- 1
  }
  else {
    dta2$RacialJusticeEvent[i] <- 0
  }
}
# 1 represents the value we are looking for (approx. 27.57%)
prop.table(table(dta2$RacialJusticeEvent))

## 
##         0         1 
## 0.7242888 0.2757112

# A Note ------------------------------------------------------------------

# Due to the way that the data was collected, the answer to Question 4 is the proportion of protesters who attended either:
# a) a POLITICAL RALLY, speech, or campaign event
# or b) an organized PROTEST, march, or demonstration
# criticizing police conduct or supporting the removal of Confederate statues. We cannot calculate the proportion of protesters who attended *only* protests about these issues, but our analysis is nevertheless a useful back-of-the-envelope approximation.

# This is why it is so important to think about the structure of your data BEFORE you collect it. Will you be able to use your data to answer the questions that you are interested in? No amount of statistical or computational wizardry can make up for bad data ("garbage in = garbage out").

# Question 5 --------------------------------------------------------------

# Using tidyr::pivot_longer, collapse variables qn20a,qn20b,qn20c,qn20d into two columns called "ProtestActivities" and "EverAppropriate". Your columns should indicate whether the respondent belives it is ever appropriate to disrupt another group's rally, kneel during the national anthem, burn the American flag, or block cars from driving on a road or highway. Save your result as a new object.
# Hint: For each id number in the data frame, there will be four rows.

newobj = tidyr::pivot_longer(dta2, c("ProtestRallyDisruption", "AnthemProtest", "FlagBurning", "BlockTraffic"),
                    names_to = "ProtestActivities", values_to = "EverAppropriate")

# Convert EverAppropriate to a factor class variable
newobj$EverAppropriateFactor = factor(newobj$EverAppropriate, levels=c("Yes, ever appropriate","No, never appropriate"), labels = c("Yes", "No"))

# Use tidyverse commands to create a data frame with the number of protest activities that each respondent believes is ever appropriate. Save your data frame as a new object with two columns: an id column and a column representing the number of protest activities that person views as ever appropriate.
# Hint: You will need to include the ".drop=F" argument in dplyr::group_by. Why do we need to include this argument?
dta3 = newobj %>%
        group_by(id, EverAppropriateFactor, .drop=F) %>%
          summarize(EventsToProtest = n()) %>%
            filter(EverAppropriateFactor=="Yes") %>% 
              select(id,EventsToProtest)

## `summarise()` regrouping output by 'id' (override with `.groups` argument)

dta3

## # A tibble: 1,850 x 2
## # Groups:   id [1,850]
##          id EventsToProtest
##       <int>           <int>
##  1 10000002               3
##  2 10000003               1
##  3 10000004               0
##  4 10000005               0
##  5 10000006               3
##  6 10000007               3
##  7 10000008               0
##  8 10000009               0
##  9 10000010               2
## 10 10000011               0
## # ... with 1,840 more rows

# Merge the number of activities that a respondent beleives are ever appropriate into your original data frame (in which each survey respondent is represented by one row).
# Hint: Keep all of the rows in the original data frame in your merge.
dta2 = merge(dta2, dta3, by = "id")

# What is the median number of protest activities that survey respondents believe are ever appropriate?
# 1
median(dta2$EventsToProtest)

## [1] 1

Problem Set 3

Yash Nakadi

6/15/2020