library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(haven)
library(readr)
library(ggplot2)
library(sur)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
anes2020<-read_dta("C:\\Users\\Bryan\\Downloads\\anes2020.dta")
anes2020 %>%
tabyl(V202186)
##  V202186    n      percent
##       -9   67 0.0080917874
##       -7   77 0.0092995169
##       -6  750 0.0905797101
##       -5   16 0.0019323671
##        0  457 0.0551932367
##        1    5 0.0006038647
##        2    2 0.0002415459
##        3    1 0.0001207729
##        4    3 0.0003623188
##        5   23 0.0027777778
##        6    4 0.0004830918
##        7    1 0.0001207729
##        8    2 0.0002415459
##        9    1 0.0001207729
##       10   40 0.0048309179
##       11    1 0.0001207729
##       12    1 0.0001207729
##       15  276 0.0333333333
##       16    1 0.0001207729
##       20   36 0.0043478261
##       23    1 0.0001207729
##       25   25 0.0030193237
##       26    1 0.0001207729
##       30  288 0.0347826087
##       31    1 0.0001207729
##       32    1 0.0001207729
##       33    1 0.0001207729
##       35   12 0.0014492754
##       40  408 0.0492753623
##       42    1 0.0001207729
##       45   38 0.0045893720
##       46    1 0.0001207729
##       48    3 0.0003623188
##       49    1 0.0001207729
##       50 1081 0.1305555556
##       51    2 0.0002415459
##       52    1 0.0001207729
##       54    1 0.0001207729
##       55   23 0.0027777778
##       56    1 0.0001207729
##       57    1 0.0001207729
##       58    2 0.0002415459
##       59    2 0.0002415459
##       60  643 0.0776570048
##       65   65 0.0078502415
##       66    1 0.0001207729
##       68    1 0.0001207729
##       69    3 0.0003623188
##       70  912 0.1101449275
##       75  134 0.0161835749
##       76    3 0.0003623188
##       77    1 0.0001207729
##       78    1 0.0001207729
##       80  181 0.0218599034
##       84    2 0.0002415459
##       85 1061 0.1281400966
##       86    6 0.0007246377
##       87    3 0.0003623188
##       88    6 0.0007246377
##       89    2 0.0002415459
##       90  181 0.0218599034
##       92    2 0.0002415459
##       95   58 0.0070048309
##       96    1 0.0001207729
##       97    1 0.0001207729
##       98    4 0.0004830918
##       99    4 0.0004830918
##      100  851 0.1027777778
##      998    3 0.0003623188
##      999  490 0.0591787440
anes2020 %>%
tabyl(V201600)
##  V201600    n     percent
##       -9   67 0.008091787
##        1 3763 0.454468599
##        2 4450 0.537439614
anes2020 %>%
tabyl(V201507x)
##  V201507x   n     percent
##        -9 354 0.042753623
##        18  34 0.004106280
##        19  49 0.005917874
##        20  45 0.005434783
##        21  52 0.006280193
##        22  57 0.006884058
##        23  74 0.008937198
##        24  92 0.011111111
##        25 107 0.012922705
##        26 107 0.012922705
##        27 135 0.016304348
##        28 119 0.014371981
##        29 130 0.015700483
##        30 142 0.017149758
##        31 112 0.013526570
##        32 117 0.014130435
##        33 125 0.015096618
##        34 141 0.017028986
##        35 151 0.018236715
##        36 141 0.017028986
##        37 148 0.017874396
##        38 152 0.018357488
##        39 149 0.017995169
##        40 141 0.017028986
##        41 150 0.018115942
##        42 114 0.013768116
##        43 116 0.014009662
##        44 113 0.013647343
##        45 118 0.014251208
##        46 119 0.014371981
##        47 106 0.012801932
##        48 106 0.012801932
##        49 125 0.015096618
##        50 152 0.018357488
##        51 126 0.015217391
##        52 115 0.013888889
##        53 118 0.014251208
##        54 122 0.014734300
##        55 138 0.016666667
##        56 127 0.015338164
##        57 135 0.016304348
##        58 145 0.017512077
##        59 151 0.018236715
##        60 170 0.020531401
##        61 139 0.016787440
##        62 156 0.018840580
##        63 154 0.018599034
##        64 157 0.018961353
##        65 178 0.021497585
##        66 168 0.020289855
##        67 141 0.017028986
##        68 142 0.017149758
##        69 156 0.018840580
##        70 127 0.015338164
##        71 145 0.017512077
##        72 142 0.017149758
##        73 150 0.018115942
##        74  94 0.011352657
##        75  93 0.011231884
##        76  90 0.010869565
##        77  82 0.009903382
##        78  64 0.007729469
##        79  61 0.007367150
##        80 401 0.048429952
anes2020 %>%
tabyl(V201508)
##  V201508    n      percent
##       -9   55 0.0066425121
##       -8    1 0.0001207729
##        1 4315 0.5211352657
##        2    7 0.0008454106
##        3  567 0.0684782609
##        4 1221 0.1474637681
##        5  163 0.0196859903
##        6 1951 0.2356280193
anes2020 %>%
tabyl(V201509)
##  V201509    n      percent
##       -9    6 0.0007246377
##       -1 4378 0.5287439614
##        1  745 0.0899758454
##        2 3151 0.3805555556
anes2020 <- filter(anes2020, V202186 >= 0 & V202186 <= 100)
anes2020 <- filter(anes2020, V201600 >= 1)
anes2020 <- filter(anes2020, V201507x >= 18)
anes2020 <- filter(anes2020, V201508 >= 1 & V201508 <= 6)
anes2020 <- filter(anes2020, V201509 >= 1 & V201509 <=2)
anes2020 %>%
ggplot(mapping = aes(V202186))+
geom_histogram()+
ggtitle(label="World Health Organziation Distribution")+
xlab(label="World Health Organziation Feelings")
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

anes2020 %>%
ggplot(mapping = aes(V202186, stat=..density..))+geom_density()+ggtitle(label="World Health Organziation Feelings Distribution")+xlab(label="World Health Organziation Feelings")
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.

qqnorm(anes2020$V202186)

ggplot(anes2020) + geom_point(mapping = aes(x=V201507x, y=V202186))
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.

scatter.smooth(anes2020$V201507x,anes2020$V202186)

lmAgeWHO = lm(V202186~V201507x, data = anes2020)
summary(lmAgeWHO)
## 
## Call:
## lm(formula = V202186 ~ V201507x, data = anes2020)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.870 -14.564   5.472  20.904  36.246 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 65.19452    1.42953  45.606   <2e-16 ***
## V201507x    -0.01801    0.02690  -0.669    0.503    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.51 on 3041 degrees of freedom
## Multiple R-squared:  0.0001473,  Adjusted R-squared:  -0.0001815 
## F-statistic: 0.4481 on 1 and 3041 DF,  p-value: 0.5033
anes2020 %>% 
  ggplot(mapping=aes(y=V202186, x=factor(V201600)))+
  geom_boxplot()+ 
  ggtitle(label="Distribution of World Health Organization Feelings by Gender") +
  xlab(label="World Health Organization")
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.

anes2020$gender.f <- factor(anes2020$V201600)
tapply(anes2020$V202186, anes2020$gender.f, mean)
##        1        2 
## 60.28822 67.11584
contr.treatment(2)
##   2
## 1 0
## 2 1
contrasts(anes2020$gender.f) = contr.treatment(2)
summary(lm(V202186~gender.f, anes2020))
## 
## Call:
## lm(formula = V202186 ~ gender.f, data = anes2020)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -67.116 -17.116   2.884  19.712  39.712 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  60.2882     0.7706   78.24  < 2e-16 ***
## gender.f2     6.8276     1.0055    6.79 1.34e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.31 on 3041 degrees of freedom
## Multiple R-squared:  0.01493,    Adjusted R-squared:  0.01461 
## F-statistic:  46.1 on 1 and 3041 DF,  p-value: 1.343e-11
anes2020$cohab.f <- factor(anes2020$V201509)

contr.treatment(2)
##   2
## 1 0
## 2 1
summary(lm(V202186~cohab.f, anes2020))
## 
## Call:
## lm(formula = V202186 ~ cohab.f, data = anes2020)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -64.34 -14.34   5.66  20.66  35.88 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  64.1176     1.1445  56.023   <2e-16 ***
## cohab.f2      0.2223     1.2716   0.175    0.861    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.52 on 3041 degrees of freedom
## Multiple R-squared:  1.005e-05,  Adjusted R-squared:  -0.0003188 
## F-statistic: 0.03057 on 1 and 3041 DF,  p-value: 0.8612
anes2020$relationship_status <-paste(anes2020$V201509, anes2020$V201508, sep = "" )
summary(anes2020$relationship_status)
##    Length     Class      Mode 
##      3043 character character
tabyl(anes2020$relationship_status)
##  anes2020$relationship_status    n     percent
##                            13   19 0.006243838
##                            14  160 0.052579691
##                            15   22 0.007229708
##                            16  377 0.123890897
##                            23  418 0.137364443
##                            24  846 0.278015117
##                            25   95 0.031219192
##                            26 1106 0.363457115
anes2020$relations_coded <-car::Recode(anes2020$ relationship_status, recodes="'13 to 16' = 'Cohabitating'; '23 to 26' = 'Single'; else=NA", as.factor=T)


anes2020 %>% 
tabyl(relations_coded)
##  relations_coded    n percent valid_percent
##             <NA> 3043       1            NA
anes2020 %>%
  
ggplot(mapping=aes(y=V202186,x=factor(relations_coded)))+
  geom_boxplot()+
  ggtitle(label="World Health Organization Feelings by Relationship Type")+
  xlab(label="World Health Organization")
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.

lmage = lm(V202186~V201507x, data = anes2020)
summary(lmage)
## 
## Call:
## lm(formula = V202186 ~ V201507x, data = anes2020)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.870 -14.564   5.472  20.904  36.246 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 65.19452    1.42953  45.606   <2e-16 ***
## V201507x    -0.01801    0.02690  -0.669    0.503    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.51 on 3041 degrees of freedom
## Multiple R-squared:  0.0001473,  Adjusted R-squared:  -0.0001815 
## F-statistic: 0.4481 on 1 and 3041 DF,  p-value: 0.5033