library(haven)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
library(sur)
anes2020<-read_dta("C:\\Users\\BTP\\Downloads\\anes2020.dta")
anes2020$V201507x[anes2020$V201507x <0] <- NA
anes2020$V201600[anes2020$V201600 <0] <- NA
anes2020$V201231x[anes2020$V201231x <0] <- NA
anes2020$V202468x[anes2020$V202468x <0] <- NA
anes2020$V202144[anes2020$V202144 <0] <- NA
anes2020 <- filter(anes2020, V201507x >= 35 & V201507x < 40)
anes2020 %>%
ggplot(mapping = aes(V202144))+
geom_histogram()+
ggtitle(label="Distribution of Simulated Salaries")+
xlab(label="Salaries")
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 79 rows containing non-finite values (stat_bin).

anes2020 %>%
ggplot(data = anes2020,mapping = aes(V202144, stat=..density..))+geom_density()+ggtitle(label="Distribution of Salaries")+xlab(label="Salaries")
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.
## Warning: Removed 79 rows containing non-finite values (stat_density).

qqnorm(anes2020$V202144)

summary(anes2020$V202144)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 15.00 35.04 70.00 100.00 79
anes2020 %>%
tabyl (V202144)
## V202144 n percent valid_percent
## 0 283 0.381916329 0.427492447
## 1 4 0.005398111 0.006042296
## 5 6 0.008097166 0.009063444
## 10 6 0.008097166 0.009063444
## 15 48 0.064777328 0.072507553
## 20 4 0.005398111 0.006042296
## 30 21 0.028340081 0.031722054
## 33 1 0.001349528 0.001510574
## 35 3 0.004048583 0.004531722
## 40 34 0.045883941 0.051359517
## 45 4 0.005398111 0.006042296
## 50 26 0.035087719 0.039274924
## 51 1 0.001349528 0.001510574
## 55 2 0.002699055 0.003021148
## 59 1 0.001349528 0.001510574
## 60 28 0.037786775 0.042296073
## 65 2 0.002699055 0.003021148
## 66 1 0.001349528 0.001510574
## 70 36 0.048582996 0.054380665
## 72 1 0.001349528 0.001510574
## 75 5 0.006747638 0.007552870
## 80 2 0.002699055 0.003021148
## 82 1 0.001349528 0.001510574
## 85 48 0.064777328 0.072507553
## 90 7 0.009446694 0.010574018
## 95 3 0.004048583 0.004531722
## 96 1 0.001349528 0.001510574
## 98 1 0.001349528 0.001510574
## 99 1 0.001349528 0.001510574
## 100 81 0.109311741 0.122356495
## NA 79 0.106612686 NA
cumulative.table(anes2020$V202144)
## 0 1 5 10 15 20 30 33
## 38.19163 38.73144 39.54116 40.35088 46.82861 47.36842 50.20243 50.33738
## 35 40 45 50 51 55 59 60
## 50.74224 55.33063 55.87045 59.37922 59.51417 59.78408 59.91903 63.69771
## 65 66 70 72 75 80 82 85
## 63.96761 64.10256 68.96086 69.09582 69.77058 70.04049 70.17544 76.65317
## 90 95 96 98 99 100
## 77.59784 78.00270 78.13765 78.27260 78.40756 89.33873
| The average value for the variable V202144 is 35.0377644. |
anes2020$subgroup <-paste(anes2020$V201509, anes2020$V201600, sep = "" )
summary(anes2020$subgroup)
## Length Class Mode
## 741 character character
anes2020$subgroupcat <-car::Recode(anes2020$ subgroup, recodes="'-11' = 'single men'; '-12' = 'single women'; '11' = 'Cohab men'; '12' = 'Cohab women'; '21' = 'NonCohab men'; '22' = 'NonCohab women'; else=NA", as.factor=T)
anes2020 %>%
tabyl (subgroupcat)
## subgroupcat n percent valid_percent
## Cohab men 42 0.056680162 0.05698779
## Cohab women 41 0.055330634 0.05563094
## NonCohab men 88 0.118758435 0.11940299
## NonCohab women 124 0.167341430 0.16824966
## single men 223 0.300944669 0.30257802
## single women 219 0.295546559 0.29715061
## <NA> 4 0.005398111 NA
anes2020 %>%
tabyl(V201509,V201600,show_missing_levels=F,show_na = FALSE) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits=2) %>%
adorn_ns() %>%
knitr::kable()
| -1 |
63.17% (223) |
57.03% (219) |
| 1 |
11.90% (42) |
10.68% (41) |
| 2 |
24.93% (88) |
32.29% (124) |
anes2020 %>%
tabyl(subgroup)
## subgroup n percent
## -11 223 0.300944669
## -12 219 0.295546559
## -1NA 2 0.002699055
## 11 42 0.056680162
## 12 41 0.055330634
## 21 88 0.118758435
## 22 124 0.167341430
## 2NA 2 0.002699055
anes2020 %>%
ggplot(mapping=aes(y=V202144,x=subgroupcat))+ geom_boxplot()+
ggtitle(label="Distribution of Trump Feelings")+
xlab(label="Relationship status")+
ylab(label="Trump Approval Scale")
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.
## Warning: Removed 79 rows containing non-finite values (stat_boxplot).

anes2020 %>%
tabyl(V202468x)
## V202468x n percent valid_percent
## 1 68 0.09176788 0.09315068
## 2 16 0.02159244 0.02191781
## 3 16 0.02159244 0.02191781
## 4 20 0.02699055 0.02739726
## 5 17 0.02294197 0.02328767
## 6 33 0.04453441 0.04520548
## 7 19 0.02564103 0.02602740
## 8 16 0.02159244 0.02191781
## 9 17 0.02294197 0.02328767
## 10 41 0.05533063 0.05616438
## 11 38 0.05128205 0.05205479
## 12 15 0.02024291 0.02054795
## 13 27 0.03643725 0.03698630
## 14 18 0.02429150 0.02465753
## 15 43 0.05802969 0.05890411
## 16 39 0.05263158 0.05342466
## 17 61 0.08232119 0.08356164
## 18 47 0.06342780 0.06438356
## 19 42 0.05668016 0.05753425
## 20 35 0.04723347 0.04794521
## 21 56 0.07557355 0.07671233
## 22 46 0.06207827 0.06301370
## NA 11 0.01484480 NA
anes2020$houseinc <-as.numeric(anes2020$V202468x, recodes="1 ='5,000'; 2 ='12,000'; 3 ='17,000'; 4 ='22,000'; 5 ='27,000'; 6 ='32,000'; 7 ='37,000'; 8 ='42,000'; 9 ='47,000'; 10 ='52,000'; 11 ='62,000'; 12 ='67,000'; 13 ='72,000'; 14 ='77,000'; 15 ='85,000'; 16 ='95,000'; 17 ='105,000'; 18 ='115,000'; 19 ='135,000'; 20 ='165,000'; 21 ='200,000'; 22 ='250,000'; else=NA", as.factor=T)
scatter.smooth(anes2020$V202144,anes2020$houseinc)

cor(anes2020$V202144, anes2020$houseinc, use = "complete.obs")
## [1] -0.1058004
lmTrump = lm(V202144~houseinc, data = anes2020)
summary(lmTrump)
##
## Call:
## lm(formula = V202144 ~ houseinc, data = anes2020)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.89 -32.38 -17.27 34.72 70.73
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.4902 3.2031 13.265 < 2e-16 ***
## houseinc -0.6009 0.2210 -2.719 0.00672 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.09 on 653 degrees of freedom
## (86 observations deleted due to missingness)
## Multiple R-squared: 0.01119, Adjusted R-squared: 0.009679
## F-statistic: 7.392 on 1 and 653 DF, p-value: 0.006725
anes2020 %>%
tabyl (V201231x)
## V201231x n percent
## 1 158 0.21322537
## 2 93 0.12550607
## 3 113 0.15249663
## 4 109 0.14709852
## 5 80 0.10796221
## 6 69 0.09311741
## 7 119 0.16059379
anes2020 %>%
tabyl(V201231x,subgroupcat,show_missing_levels=F,show_na = FALSE) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits=2) %>%
adorn_ns() %>%
knitr::kable()
| 1 |
21.43% (9) |
24.39% (10) |
27.27% (24) |
27.42% (34) |
15.70% (35) |
21.00% (46) |
| 2 |
9.52% (4) |
14.63% (6) |
10.23% (9) |
21.77% (27) |
6.28% (14) |
15.07% (33) |
| 3 |
16.67% (7) |
19.51% (8) |
19.32% (17) |
12.90% (16) |
16.59% (37) |
12.79% (28) |
| 4 |
14.29% (6) |
14.63% (6) |
20.45% (18) |
16.94% (21) |
13.90% (31) |
10.50% (23) |
| 5 |
21.43% (9) |
4.88% (2) |
4.55% (4) |
6.45% (8) |
15.25% (34) |
10.50% (23) |
| 6 |
2.38% (1) |
9.76% (4) |
10.23% (9) |
6.45% (8) |
12.56% (28) |
8.68% (19) |
| 7 |
14.29% (6) |
12.20% (5) |
7.95% (7) |
8.06% (10) |
19.73% (44) |
21.46% (47) |