Insert your name here Insert date here
## # A tibble: 4 × 2
## harass5 n
## <chr> <int>
## 1 Does not apply (i do not have a job/superior/co-worker) 96
## 2 No 1136
## 3 Yes 237
## 4 <NA> 1398
gss %>%
filter(harass5 != "NA", harass5 != "Does not apply(i do not have a job/superior/co-worker)") %>%
count(harass5) %>%
mutate(percent = n / sum(n) * 100)## # A tibble: 3 × 3
## harass5 n percent
## <chr> <int> <dbl>
## 1 Does not apply (i do not have a job/superior/co-worker) 96 6.54
## 2 No 1136 77.3
## 3 Yes 237 16.1
ggplot(gss16, aes(x = email)) +
geom_histogram(binwidth = 60) +
labs(
title = "Minutes Spent on Email Weekly",
x = "Minutes per week",
y = "Count"
)## Warning: Removed 1218 rows containing non-finite outside the scale range
## (`stat_bin()`).
gss16 %>%
summarize(
mean_email = mean(email, na.rm = TRUE),
median_email = median(email, na.rm = TRUE)
)## # A tibble: 1 × 2
## mean_email median_email
## <dbl> <dbl>
## 1 417. 120
The median is a better measure of the typical amount of time spent on email because the distribution is likely skewed by people who spend extremely large amounts of time on email each week.
gss16 <- gss16 %>%
mutate(
snap_insta = case_when(
snapchat == "Yes" | instagrm == "Yes" ~ "Yes",
snapchat == "No" & instagrm == "No" ~ "No",
is.na(snapchat) & is.na(instagrm) ~ NA_character_
)
)## # A tibble: 2 × 3
## snap_insta n percent
## <chr> <int> <dbl>
## 1 No 858 62.5
## 2 Yes 514 37.5
## # A tibble: 9 × 2
## wrkstat n
## <chr> <int>
## 1 Keeping house 284
## 2 Other 89
## 3 Retired 574
## 4 School 76
## 5 Temp not working 57
## 6 Unempl, laid off 118
## 7 Working fulltime 1321
## 8 Working parttime 345
## 9 <NA> 3
##
## Call:
## lm(formula = email ~ educ + wrkstat + snap_insta, data = gss16)
##
## Residuals:
## Min 1Q Median 3Q Max
## -760.5 -372.7 -161.2 95.4 3355.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -229.736 149.837 -1.533 0.12569
## educ 29.632 9.601 3.087 0.00211 **
## wrkstatOther 33.057 209.470 0.158 0.87465
## wrkstatRetired 68.279 111.051 0.615 0.53887
## wrkstatSchool -123.812 143.981 -0.860 0.39014
## wrkstatTemp not working -73.709 153.948 -0.479 0.63225
## wrkstatUnempl, laid off 118.349 151.242 0.783 0.43419
## wrkstatWorking fulltime 366.840 87.690 4.183 3.26e-05 ***
## wrkstatWorking parttime 18.900 101.632 0.186 0.85253
## snap_instaYes 149.961 52.745 2.843 0.00460 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 642.2 on 669 degrees of freedom
## (2188 observations deleted due to missingness)
## Multiple R-squared: 0.1043, Adjusted R-squared: 0.09227
## F-statistic: 8.657 on 9 and 669 DF, p-value: 2.395e-12
model_data <- na.omit(gss16[, c("email", "educ", "wrkstat", "snap_insta")])
email_model <- lm(email ~ educ + wrkstat + snap_insta, data = model_data)
model_data$predicted <- predict(email_model)
model_data$residuals <- resid(email_model)
ggplot(model_data, aes(predicted, residuals)) +
geom_point(alpha = 0.5) +
geom_hline(yintercept = 0, linetype = "dashed")gss16 <- gss16 %>%
mutate(
science_support = case_when(
advfront == "Strongly agree" ~ "Yes",
advfront == "Agree" ~ "Yes",
advfront == "Disagree" ~ "No",
advfront == "Strongly disagree" ~ "No",)
)gss16 <- gss16 %>%
mutate(
political_group = case_when(
polviews %in% c("Extremely liberal",
"Liberal",
"Slightly liberal") ~ "Liberal",
polviews %in% c("Slghtly conservative",
"Conservative",
"Extrmly conservative") ~ "Conservative",
polviews == "Moderate" ~ "Moderate"
)
)
gss16$political_group <- factor(
gss16$political_group,
levels = c("Liberal", "Moderate", "Conservative")
)ggplot(gss16,
aes(x = political_group,
fill = science_support)) +
geom_bar(position = "fill") +
labs(
title = "Political Views and Support for Science Research",
x = "Political Group",
y = "Proportion",
fill = "Support Science Research"
)