Turk data
td = read_csv("turk_data.csv") %>%
slice(-1:-3) %>%
select(18:36) %>%
mutate(id = 1:n())
td %<>%
mutate(Sex = tolower(Sex))%>%
mutate_all(funs(as.factor))
names(td) = tolower(names(td))
Pew data
pew = read_csv("pewsurvey.csv") %>%
rename(variable = var,
value = response) %>%
mutate(variable = tolower(variable),
variable = as.factor(variable))
munged.props = td %>%
select(-16:-20) %>%
mutate(intmob = ifelse(intmob == "yes", "use internet",
"do not use internet")) %>%
rename(q.1 = pol1,
q.19f2 = pol19f2,
q.1a = pol1aa,
q.20f1 = pol20f1,
q.30f2 = pol30f2,
q.62f1 = pol62f1a,
q20 = techq20)
raw.props = munged.props %>%
gather(variable, value) %>%
group_by(variable, value) %>%
summarise (n = n()) %>%
mutate(obs.prop = n / sum(n)) %>%
filter(variable != "attention") %>%
ungroup() %>%
mutate(value = tolower(value),
variable = tolower(variable),
variable = as.factor(variable))
raw.d = full_join(raw.props,
pew, by=c("value", "variable")) %>%
filter(!is.na(value)) %>%
arrange(variable) %>%
mutate(obs.prop = ifelse(is.na(obs.prop), 0,
obs.prop),
pew.estimates = pew.estimates/100)
## what's up with q.13
## Plot
ggplot(raw.d, aes(x = obs.prop, y = pew.estimates)) +
geom_point() +
theme_bw() +
geom_abline(intercept = 0, slope = 1) +
ggtitle("raw unweighted estimates")+
ylim(0,1)
Correlations
cor.test(raw.d$obs.prop, raw.d$pew.estimates)
##
## Pearson's product-moment correlation
##
## data: raw.d$obs.prop and raw.d$pew.estimates
## t = 4.6907, df = 38, p-value = 3.472e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3624086 0.7715264
## sample estimates:
## cor
## 0.6055546
pops = read_csv("population.csv") %>%
mutate_each(funs(as.factor),
c(RACE5, HISP, STNAME, SEX)) %>%
select(-DATE, -STATE)
pops %>%
skim() %>%
kable()
| var | type | stat | level | value |
|---|---|---|---|---|
| AGE | integer | missing | .all | 0.00000 |
| AGE | integer | complete | .all | 87720.00000 |
| AGE | integer | n | .all | 87720.00000 |
| AGE | integer | mean | .all | 42.50000 |
| AGE | integer | sd | .all | 24.82452 |
| AGE | integer | min | .all | 0.00000 |
| AGE | integer | median | .all | 42.50000 |
| AGE | integer | quantile | 25% | 21.00000 |
| AGE | integer | quantile | 75% | 64.00000 |
| AGE | integer | max | .all | 85.00000 |
| AGE | integer | hist | ▇▇▇▇▇▇▇▇▇▇ | 0.00000 |
| SEX | factor | missing | .all | 0.00000 |
| SEX | factor | complete | .all | 87720.00000 |
| SEX | factor | n | .all | 87720.00000 |
| SEX | factor | count | Female | 43860.00000 |
| SEX | factor | count | Male | 43860.00000 |
| SEX | factor | count | NA | 0.00000 |
| SEX | factor | n_unique | .all | 2.00000 |
| RACE5 | factor | missing | .all | 0.00000 |
| RACE5 | factor | complete | .all | 87720.00000 |
| RACE5 | factor | n | .all | 87720.00000 |
| RACE5 | factor | count | AIAN alone or in combination | 17544.00000 |
| RACE5 | factor | count | Asian alone or in combination | 17544.00000 |
| RACE5 | factor | count | Black alone or in combination | 17544.00000 |
| RACE5 | factor | count | NHPI alone or in combination | 17544.00000 |
| RACE5 | factor | count | White alone or in combination | 17544.00000 |
| RACE5 | factor | count | NA | 0.00000 |
| RACE5 | factor | n_unique | .all | 5.00000 |
| HISP | factor | missing | .all | 0.00000 |
| HISP | factor | complete | .all | 87720.00000 |
| HISP | factor | n | .all | 87720.00000 |
| HISP | factor | count | Hispanic | 43860.00000 |
| HISP | factor | count | Non-Hispanic | 43860.00000 |
| HISP | factor | count | NA | 0.00000 |
| HISP | factor | n_unique | .all | 2.00000 |
| STNAME | factor | missing | .all | 0.00000 |
| STNAME | factor | complete | .all | 87720.00000 |
| STNAME | factor | n | .all | 87720.00000 |
| STNAME | factor | count | Alabama | 1720.00000 |
| STNAME | factor | count | Alaska | 1720.00000 |
| STNAME | factor | count | Arizona | 1720.00000 |
| STNAME | factor | count | Arkansas | 1720.00000 |
| STNAME | factor | count | California | 1720.00000 |
| STNAME | factor | count | Colorado | 1720.00000 |
| STNAME | factor | count | Connecticut | 1720.00000 |
| STNAME | factor | count | Delaware | 1720.00000 |
| STNAME | factor | count | District of Columbia | 1720.00000 |
| STNAME | factor | count | Florida | 1720.00000 |
| STNAME | factor | count | Georgia | 1720.00000 |
| STNAME | factor | count | Hawaii | 1720.00000 |
| STNAME | factor | count | Idaho | 1720.00000 |
| STNAME | factor | count | Illinois | 1720.00000 |
| STNAME | factor | count | Indiana | 1720.00000 |
| STNAME | factor | count | Iowa | 1720.00000 |
| STNAME | factor | count | Kansas | 1720.00000 |
| STNAME | factor | count | Kentucky | 1720.00000 |
| STNAME | factor | count | Louisiana | 1720.00000 |
| STNAME | factor | count | Maine | 1720.00000 |
| STNAME | factor | count | Maryland | 1720.00000 |
| STNAME | factor | count | Massachusetts | 1720.00000 |
| STNAME | factor | count | Michigan | 1720.00000 |
| STNAME | factor | count | Minnesota | 1720.00000 |
| STNAME | factor | count | Mississippi | 1720.00000 |
| STNAME | factor | count | Missouri | 1720.00000 |
| STNAME | factor | count | Montana | 1720.00000 |
| STNAME | factor | count | Nebraska | 1720.00000 |
| STNAME | factor | count | Nevada | 1720.00000 |
| STNAME | factor | count | New Hampshire | 1720.00000 |
| STNAME | factor | count | New Jersey | 1720.00000 |
| STNAME | factor | count | New Mexico | 1720.00000 |
| STNAME | factor | count | New York | 1720.00000 |
| STNAME | factor | count | North Carolina | 1720.00000 |
| STNAME | factor | count | North Dakota | 1720.00000 |
| STNAME | factor | count | Ohio | 1720.00000 |
| STNAME | factor | count | Oklahoma | 1720.00000 |
| STNAME | factor | count | Oregon | 1720.00000 |
| STNAME | factor | count | Pennsylvania | 1720.00000 |
| STNAME | factor | count | Rhode Island | 1720.00000 |
| STNAME | factor | count | South Carolina | 1720.00000 |
| STNAME | factor | count | South Dakota | 1720.00000 |
| STNAME | factor | count | Tennessee | 1720.00000 |
| STNAME | factor | count | Texas | 1720.00000 |
| STNAME | factor | count | Utah | 1720.00000 |
| STNAME | factor | count | Vermont | 1720.00000 |
| STNAME | factor | count | Virginia | 1720.00000 |
| STNAME | factor | count | Washington | 1720.00000 |
| STNAME | factor | count | West Virginia | 1720.00000 |
| STNAME | factor | count | Wisconsin | 1720.00000 |
| STNAME | factor | count | Wyoming | 1720.00000 |
| STNAME | factor | count | NA | 0.00000 |
| STNAME | factor | n_unique | .all | 51.00000 |
| POP | integer | missing | .all | 0.00000 |
| POP | integer | complete | .all | 87720.00000 |
| POP | integer | n | .all | 87720.00000 |
| POP | integer | mean | .all | 3735.17396 |
| POP | integer | sd | .all | 11283.22860 |
| POP | integer | min | .all | 0.00000 |
| POP | integer | median | .all | 182.00000 |
| POP | integer | quantile | 25% | 29.00000 |
| POP | integer | quantile | 75% | 1541.00000 |
| POP | integer | max | .all | 291539.00000 |
| POP | integer | hist | ▇▁▁▁▁▁▁▁▁▁ | 0.00000 |