summary(assn8$laborscore)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 50.00 60.00 60.78 80.00 100.00
summary(assn8$policescore)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 60.00 70.00 69.73 85.00 100.00
#police generally have better scores than labor unions
plot(assn8$laborscore,assn8$policescore)

cor(assn8$laborscore,assn8$policescore)
## [1] -0.0822024
# so for every 1 unit increase in labor score, the police score on average falls slightly
# Recode laborscore into intervals
assn8$labor_interval <- cut(
assn8$laborscore,
breaks = c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100),
include.lowest = TRUE
)
tabyl(assn8$labor_interval)
## assn8$labor_interval n percent
## [0,10] 155 0.03209774
## (10,20] 162 0.03354732
## (20,30] 235 0.04866432
## (30,40] 364 0.07537793
## (40,50] 1163 0.24083661
## (50,60] 695 0.14392214
## (60,70] 682 0.14123007
## (70,80] 253 0.05239180
## (80,90] 649 0.13439636
## (90,100] 471 0.09753572
# interesting that they generally have a positive perception (scoring mostly over 50)
assn8 %>%
group_by(labor_interval) %>%
summarise(mean_score = mean(policescore))
## # A tibble: 10 × 2
## labor_interval mean_score
## <fct> <dbl>
## 1 [0,10] 63.8
## 2 (10,20] 75.7
## 3 (20,30] 74.3
## 4 (30,40] 74.9
## 5 (40,50] 70.7
## 6 (50,60] 70.6
## 7 (60,70] 69.2
## 8 (70,80] 66.9
## 9 (80,90] 68.7
## 10 (90,100] 63.4
#not a wild distinction but generally low labor scores correlate to higher police scores
p<-assn8 %>%
group_by(labor_interval) %>%
summarise(mean_score = mean(policescore, na.rm = TRUE)) %>%
ggplot(aes(x = labor_interval, y = mean_score)) +
geom_col(fill = "steelblue") +
labs(title = "Mean Labor Score by Feeling Thermometer Interval",
x = "Feeling Thermometer Interval",
y = "Mean Police Score") +
theme_minimal()
p

# again, can see a slight correlation between lower labor feelings and higher police feelings
model1 <- lm(policescore ~ laborscore, data = assn8)
model2 <- lm(policescore ~ laborscore + I(laborscore^2), data = assn8)
model3 <- lm(policescore ~ laborscore + I(laborscore^2) + I(laborscore^3), data = assn8)
#centering
summary(model3)
##
## Call:
## lm(formula = policescore ~ laborscore + I(laborscore^2) + I(laborscore^3),
## data = assn8)
##
## Residuals:
## Min 1Q Median 3Q Max
## -73.43 -13.35 3.64 17.73 35.03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.530e+01 1.939e+00 33.684 < 2e-16 ***
## laborscore 5.388e-01 1.338e-01 4.026 5.77e-05 ***
## I(laborscore^2) -1.056e-02 2.865e-03 -3.685 0.000231 ***
## I(laborscore^3) 5.138e-05 1.773e-05 2.897 0.003780 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.11 on 4825 degrees of freedom
## Multiple R-squared: 0.01368, Adjusted R-squared: 0.01306
## F-statistic: 22.3 on 3 and 4825 DF, p-value: 2.445e-14
#all terms are statistically significant but I need help interpreting this a bit
# Generate predictions
assn8$pred1 <- predict(model1)
assn8$pred2 <- predict(model2)
assn8$pred3 <- predict(model3)
p<-ggplot(assn8, aes(x = laborscore)) +
geom_line(aes(y = pred1, color = "Linear"), linewidth = 1) +
geom_line(aes(y = pred2, color = "Quadratic"), linewidth = 1) +
geom_line(aes(y = pred3, color = "Cubic"), linewidth = 1) +
labs(title = "Predicted Values from Polynomial Models",
x = "laborscore",
y = "Predicted policescore",
color = "Model") +
theme_minimal()
p

cor(assn8$laborscore, assn8$policescore)
## [1] -0.0822024
cor(assn8$laborscore, assn8$policescore, method = "spearman")
## [1] -0.08167769
cor(assn8$laborscore, assn8$policescore, method = "kendall")
## [1] -0.06230617
#not too much variation between the three; all are negative