library(haven)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
library(sur)
anes2020<-read_dta("C:\\Users\\BTP\\Downloads\\anes2020.dta")
anes2020$V201507x[anes2020$V201507x <0] <- NA
anes2020$V201600[anes2020$V201600 <0] <- NA
anes2020$V201231x[anes2020$V201231x <0] <- NA
anes2020$V202468x[anes2020$V202468x  <0] <- NA
anes2020$V202144[anes2020$V202144 <0] <- NA
anes2020 <- filter(anes2020, V201507x >= 35 & V201507x < 40)
anes2020 %>%
ggplot(mapping = aes(V202144))+
geom_histogram()+
ggtitle(label="Distribution of Simulated Salaries")+
xlab(label="Salaries")
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 79 rows containing non-finite values (stat_bin).

anes2020 %>%
ggplot(data = anes2020,mapping = aes(V202144, stat=..density..))+geom_density()+ggtitle(label="Distribution of Salaries")+xlab(label="Salaries")
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.
## Warning: Removed 79 rows containing non-finite values (stat_density).

qqnorm(anes2020$V202144)

summary(anes2020$V202144)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00   15.00   35.04   70.00  100.00      79
anes2020 %>%
tabyl (V202144)
##  V202144   n     percent valid_percent
##        0 283 0.381916329   0.427492447
##        1   4 0.005398111   0.006042296
##        5   6 0.008097166   0.009063444
##       10   6 0.008097166   0.009063444
##       15  48 0.064777328   0.072507553
##       20   4 0.005398111   0.006042296
##       30  21 0.028340081   0.031722054
##       33   1 0.001349528   0.001510574
##       35   3 0.004048583   0.004531722
##       40  34 0.045883941   0.051359517
##       45   4 0.005398111   0.006042296
##       50  26 0.035087719   0.039274924
##       51   1 0.001349528   0.001510574
##       55   2 0.002699055   0.003021148
##       59   1 0.001349528   0.001510574
##       60  28 0.037786775   0.042296073
##       65   2 0.002699055   0.003021148
##       66   1 0.001349528   0.001510574
##       70  36 0.048582996   0.054380665
##       72   1 0.001349528   0.001510574
##       75   5 0.006747638   0.007552870
##       80   2 0.002699055   0.003021148
##       82   1 0.001349528   0.001510574
##       85  48 0.064777328   0.072507553
##       90   7 0.009446694   0.010574018
##       95   3 0.004048583   0.004531722
##       96   1 0.001349528   0.001510574
##       98   1 0.001349528   0.001510574
##       99   1 0.001349528   0.001510574
##      100  81 0.109311741   0.122356495
##       NA  79 0.106612686            NA
cumulative.table(anes2020$V202144)
##        0        1        5       10       15       20       30       33 
## 38.19163 38.73144 39.54116 40.35088 46.82861 47.36842 50.20243 50.33738 
##       35       40       45       50       51       55       59       60 
## 50.74224 55.33063 55.87045 59.37922 59.51417 59.78408 59.91903 63.69771 
##       65       66       70       72       75       80       82       85 
## 63.96761 64.10256 68.96086 69.09582 69.77058 70.04049 70.17544 76.65317 
##       90       95       96       98       99      100 
## 77.59784 78.00270 78.13765 78.27260 78.40756 89.33873
The average value for the variable V202144 is 35.0377644.
anes2020$subgroup <-paste(anes2020$V201509, anes2020$V201600, sep = "" )
summary(anes2020$subgroup)
##    Length     Class      Mode 
##       741 character character
anes2020$subgroupcat <-car::Recode(anes2020$ subgroup, recodes="'-11' = 'single men'; '-12' = 'single women'; '11' = 'Cohab men'; '12' = 'Cohab women'; '21' = 'NonCohab men'; '22' = 'NonCohab women'; else=NA", as.factor=T)


anes2020 %>% 
tabyl (subgroupcat)
##     subgroupcat   n     percent valid_percent
##       Cohab men  42 0.056680162    0.05698779
##     Cohab women  41 0.055330634    0.05563094
##    NonCohab men  88 0.118758435    0.11940299
##  NonCohab women 124 0.167341430    0.16824966
##      single men 223 0.300944669    0.30257802
##    single women 219 0.295546559    0.29715061
##            <NA>   4 0.005398111            NA
anes2020 %>%
tabyl(V201509,V201600,show_missing_levels=F,show_na = FALSE) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits=2) %>%
adorn_ns() %>%
knitr::kable()
V201509 1 2
-1 63.17% (223) 57.03% (219)
1 11.90% (42) 10.68% (41)
2 24.93% (88) 32.29% (124)
anes2020 %>%
tabyl(subgroup)
##  subgroup   n     percent
##       -11 223 0.300944669
##       -12 219 0.295546559
##      -1NA   2 0.002699055
##        11  42 0.056680162
##        12  41 0.055330634
##        21  88 0.118758435
##        22 124 0.167341430
##       2NA   2 0.002699055
anes2020 %>% 
ggplot(mapping=aes(y=V202144,x=subgroupcat))+ geom_boxplot()+
ggtitle(label="Distribution of Trump Feelings")+
xlab(label="Relationship status")+ 
ylab(label="Trump Approval Scale")
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.
## Warning: Removed 79 rows containing non-finite values (stat_boxplot).

anes2020 %>%
tabyl(V202468x)
##  V202468x  n    percent valid_percent
##         1 68 0.09176788    0.09315068
##         2 16 0.02159244    0.02191781
##         3 16 0.02159244    0.02191781
##         4 20 0.02699055    0.02739726
##         5 17 0.02294197    0.02328767
##         6 33 0.04453441    0.04520548
##         7 19 0.02564103    0.02602740
##         8 16 0.02159244    0.02191781
##         9 17 0.02294197    0.02328767
##        10 41 0.05533063    0.05616438
##        11 38 0.05128205    0.05205479
##        12 15 0.02024291    0.02054795
##        13 27 0.03643725    0.03698630
##        14 18 0.02429150    0.02465753
##        15 43 0.05802969    0.05890411
##        16 39 0.05263158    0.05342466
##        17 61 0.08232119    0.08356164
##        18 47 0.06342780    0.06438356
##        19 42 0.05668016    0.05753425
##        20 35 0.04723347    0.04794521
##        21 56 0.07557355    0.07671233
##        22 46 0.06207827    0.06301370
##        NA 11 0.01484480            NA
anes2020$houseinc <-as.numeric(anes2020$V202468x, recodes="1 ='5,000'; 2 ='12,000'; 3 ='17,000'; 4 ='22,000'; 5 ='27,000'; 6 ='32,000'; 7 ='37,000'; 8 ='42,000'; 9 ='47,000'; 10 ='52,000'; 11 ='62,000'; 12 ='67,000'; 13 ='72,000'; 14 ='77,000'; 15 ='85,000'; 16 ='95,000'; 17 ='105,000'; 18 ='115,000'; 19 ='135,000'; 20 ='165,000'; 21 ='200,000'; 22 ='250,000'; else=NA", as.factor=T)
scatter.smooth(anes2020$V202144,anes2020$houseinc)

cor(anes2020$V202144, anes2020$houseinc, use = "complete.obs")
## [1] -0.1058004
lmTrump = lm(V202144~houseinc, data = anes2020)
summary(lmTrump)
## 
## Call:
## lm(formula = V202144 ~ houseinc, data = anes2020)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -41.89 -32.38 -17.27  34.72  70.73 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  42.4902     3.2031  13.265  < 2e-16 ***
## houseinc     -0.6009     0.2210  -2.719  0.00672 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.09 on 653 degrees of freedom
##   (86 observations deleted due to missingness)
## Multiple R-squared:  0.01119,    Adjusted R-squared:  0.009679 
## F-statistic: 7.392 on 1 and 653 DF,  p-value: 0.006725
anes2020 %>%
tabyl (V201231x)
##  V201231x   n    percent
##         1 158 0.21322537
##         2  93 0.12550607
##         3 113 0.15249663
##         4 109 0.14709852
##         5  80 0.10796221
##         6  69 0.09311741
##         7 119 0.16059379
anes2020 %>%
tabyl(V201231x,subgroupcat,show_missing_levels=F,show_na = FALSE) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits=2) %>%
adorn_ns() %>%
knitr::kable()
V201231x Cohab men Cohab women NonCohab men NonCohab women single men single women
1 21.43% (9) 24.39% (10) 27.27% (24) 27.42% (34) 15.70% (35) 21.00% (46)
2 9.52% (4) 14.63% (6) 10.23% (9) 21.77% (27) 6.28% (14) 15.07% (33)
3 16.67% (7) 19.51% (8) 19.32% (17) 12.90% (16) 16.59% (37) 12.79% (28)
4 14.29% (6) 14.63% (6) 20.45% (18) 16.94% (21) 13.90% (31) 10.50% (23)
5 21.43% (9) 4.88% (2) 4.55% (4) 6.45% (8) 15.25% (34) 10.50% (23)
6 2.38% (1) 9.76% (4) 10.23% (9) 6.45% (8) 12.56% (28) 8.68% (19)
7 14.29% (6) 12.20% (5) 7.95% (7) 8.06% (10) 19.73% (44) 21.46% (47)