hw3

library(dplyr)
library(ggplot2)
library(tidyverse)
library(psych)
library(magrittr)
library(knitr)
library(kableExtra)
library(readr)

politics_media <- read_csv("~/2nd round of hell/politics_media.csv")

politics = politics_media %>% 
  select(lrscale, stflife, polintr, stfdem, stfgov, trstprl, trstplt, trstprt, cptppola, trstep, trstun, stfeco, stfhlth, stfedu, stflife)

politics %>% 
  filter(lrscale != 77) %>% 
  filter(lrscale != 88) %>% 
  filter(lrscale != 99 )

## # A tibble: 2,276 x 14
##    lrscale stflife polintr stfdem stfgov trstprl trstplt trstprt cptppola
##      <int>   <int>   <int>  <int>  <int>   <int>   <int>   <int>    <int>
##  1       6       4       3      7      5       4       5       6        3
##  2       7       6       3      7      5       7       7       3        4
##  3       5       6       2      7      6       7       2       0        3
##  4       6       4       3      6      5       7       5       6        2
##  5       5       6       3      7      6       6       6       7        3
##  6       7       5       3      6      6       6       6       3        2
##  7       6       7       2      6      7       5       7       7        2
##  8       8       5       3      5      6       5       7       3        3
##  9       7       7       2      8      7       6       7       5        3
## 10       8       6       2      5      6       5       3       2        4
## # … with 2,266 more rows, and 5 more variables: trstep <int>,
## #   trstun <int>, stfeco <int>, stfhlth <int>, stfedu <int>

politics %>%
  filter(stflife != 77) %>%
  filter(stflife != 88) %>%
  filter(stflife != 99)

## # A tibble: 2,752 x 14
##    lrscale stflife polintr stfdem stfgov trstprl trstplt trstprt cptppola
##      <int>   <int>   <int>  <int>  <int>   <int>   <int>   <int>    <int>
##  1       6       4       3      7      5       4       5       6        3
##  2       7       6       3      7      5       7       7       3        4
##  3       5       6       2      7      6       7       2       0        3
##  4       6       4       3      6      5       7       5       6        2
##  5       5       6       3      7      6       6       6       7        3
##  6       7       5       3      6      6       6       6       3        2
##  7       6       7       2      6      7       5       7       7        2
##  8      88       4       4      7      6       2       3       3        2
##  9       8       5       3      5      6       5       7       3        3
## 10       7       7       2      8      7       6       7       5        3
## # … with 2,742 more rows, and 5 more variables: trstep <int>,
## #   trstun <int>, stfeco <int>, stfhlth <int>, stfedu <int>

politics.1 = politics %>% 
  select(lrscale, stflife, polintr, stfdem, stfgov, trstprl, trstplt, trstprt, cptppola, trstep, trstun, stfeco, stfhlth, stfedu, stflife)

politics.1 %>% 
  filter(polintr != 7) %>% 
  filter(polintr != 8) %>% 
  filter(polintr != 9 )

## # A tibble: 2,754 x 14
##    lrscale stflife polintr stfdem stfgov trstprl trstplt trstprt cptppola
##      <int>   <int>   <int>  <int>  <int>   <int>   <int>   <int>    <int>
##  1       6       4       3      7      5       4       5       6        3
##  2       7       6       3      7      5       7       7       3        4
##  3       5       6       2      7      6       7       2       0        3
##  4       6       4       3      6      5       7       5       6        2
##  5       5       6       3      7      6       6       6       7        3
##  6       7       5       3      6      6       6       6       3        2
##  7       6       7       2      6      7       5       7       7        2
##  8      88       4       4      7      6       2       3       3        2
##  9       8       5       3      5      6       5       7       3        3
## 10       7       7       2      8      7       6       7       5        3
## # … with 2,744 more rows, and 5 more variables: trstep <int>,
## #   trstun <int>, stfeco <int>, stfhlth <int>, stfedu <int>

politics.1 %>%
  filter(stfedu != 77) %>%
  filter(stfedu != 88) %>%
  filter(stfedu != 99)

## # A tibble: 2,669 x 14
##    lrscale stflife polintr stfdem stfgov trstprl trstplt trstprt cptppola
##      <int>   <int>   <int>  <int>  <int>   <int>   <int>   <int>    <int>
##  1       6       4       3      7      5       4       5       6        3
##  2       7       6       3      7      5       7       7       3        4
##  3       5       6       2      7      6       7       2       0        3
##  4       6       4       3      6      5       7       5       6        2
##  5       5       6       3      7      6       6       6       7        3
##  6       7       5       3      6      6       6       6       3        2
##  7       6       7       2      6      7       5       7       7        2
##  8      88       4       4      7      6       2       3       3        2
##  9       8       5       3      5      6       5       7       3        3
## 10       7       7       2      8      7       6       7       5        3
## # … with 2,659 more rows, and 5 more variables: trstep <int>,
## #   trstun <int>, stfeco <int>, stfhlth <int>, stfedu <int>

politics.1 %>% 
filter(cptppola != 7) %>% 
filter(cptppola != 8) %>% 
filter(cptppola!= 9 )

## # A tibble: 2,731 x 14
##    lrscale stflife polintr stfdem stfgov trstprl trstplt trstprt cptppola
##      <int>   <int>   <int>  <int>  <int>   <int>   <int>   <int>    <int>
##  1       6       4       3      7      5       4       5       6        3
##  2       7       6       3      7      5       7       7       3        4
##  3       5       6       2      7      6       7       2       0        3
##  4       6       4       3      6      5       7       5       6        2
##  5       5       6       3      7      6       6       6       7        3
##  6       7       5       3      6      6       6       6       3        2
##  7       6       7       2      6      7       5       7       7        2
##  8      88       4       4      7      6       2       3       3        2
##  9       8       5       3      5      6       5       7       3        3
## 10       7       7       2      8      7       6       7       5        3
## # … with 2,721 more rows, and 5 more variables: trstep <int>,
## #   trstun <int>, stfeco <int>, stfhlth <int>, stfedu <int>

polintr - How interested in politics: factor

stfdem - How satisfied with the way democracy works in country: from 0 to 10

politics.1 = politics %>% 
  select(polintr, stflife)
politics.1$polintr <- ifelse (politics.1$polintr == 1, "Very interested",
                    ifelse(politics.1$polintr == 2, "Quite interested", 
                    ifelse(politics.1$polintr == 3, "Hardly interested", "Not interested")))
politics.1$trstprt <-  as.numeric(as.character(politics.1$stflife))
politics.1$polintr <- as.factor(politics.1$polintr)
politics.2 <- data.frame(politics.1$polintr,politics.1$stflife)
str(politics.2)

## 'data.frame':    2757 obs. of  2 variables:
##  $ politics.1.polintr: Factor w/ 4 levels "Hardly interested",..: 1 1 3 1 1 1 3 2 1 3 ...
##  $ politics.1.stflife: int  4 6 6 4 6 5 7 4 5 7 ...

politics.11 = politics.1 %>% 
  filter(politics.1$stflife != 88)

politics.11 = politics.11 %>% 
  filter(politics.11$stflife != 77)

politics.11 = politics.11 %>% 
  filter(politics.11$stflife != 99)


describeBy(politics.11$stflife, politics.11$polintr, mat = TRUE) %>% #create dataframe
  select(polintr = group1, N=n, Mean=mean, SD=sd, Median=median, Min=min, Max=max, 
                Skew=skew, Kurtosis=kurtosis, st.error = se) %>% 
  kable(align=c("lrrrrrrrr"), digits=2, row.names = FALSE,
        caption="Satisfaction with democracy by political preferences") %>% 
  kable_styling(bootstrap_options=c("bordered", "responsive","striped"), full_width = FALSE)

Satisfaction with democracy by political preferences
polintr	N	Mean	SD	Median	Max	Skew	Kurtosis	st.error
Hardly interested	733	7.23	1.90	8	10	-0.97	1.42	0.07
Not interested	747	7.11	2.00	7	10	-0.86	1.02	0.07
Quite interested	964	7.29	1.80	7	10	-1.02	2.00	0.06
Very interested	308	7.54	1.87	8	10	-0.99	1.35	0.11

par(mar = c(3,10,0,3))
barplot(table(politics.1$polintr)/nrow(politics.1)*100, horiz = T, xlim = c(0,60), las = 2)

ggplot()+
  geom_boxplot(data = politics.1, aes(x = polintr, y = stflife)) +
  ylim(c(0,10))

## Warning: Removed 5 rows containing non-finite values (stat_boxplot).

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

## The following object is masked from 'package:purrr':
## 
##     some

## The following object is masked from 'package:dplyr':
## 
##     recode

leveneTest(politics.11$stflife ~ politics.11$polintr)

## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value Pr(>F)
## group    3  1.8406 0.1376
##       2748

oneway.test(politics.11$stflife ~ politics.11$polintr, var.equal = T)

## 
##  One-way analysis of means
## 
## data:  politics.11$stflife and politics.11$polintr
## F = 3.8929, num df = 3, denom df = 2748, p-value = 0.008659

aov.out <- aov(politics.11$stflife ~ politics.11$polintr) # another function of ANOVA which should be used here
summary(aov.out)

##                       Df Sum Sq Mean Sq F value  Pr(>F)   
## politics.11$polintr    3     42  13.893   3.893 0.00866 **
## Residuals           2748   9807   3.569                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova.res <- residuals(object = aov.out) 
describe(anova.res)

##    vars    n mean   sd median trimmed  mad   min  max range  skew kurtosis
## X1    1 2752    0 1.89  -0.11    0.14 1.48 -7.54 2.89 10.43 -0.96     1.48
##      se
## X1 0.04

shapiro.test(x = anova.res)

## 
##  Shapiro-Wilk normality test
## 
## data:  anova.res
## W = 0.93479, p-value < 2.2e-16

layout(matrix(1:4, 2, 2))
plot(aov.out)

hist(anova.res)