Stats_assignment8

summary(assn8$laborscore)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   50.00   60.00   60.78   80.00  100.00

summary(assn8$policescore)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   60.00   70.00   69.73   85.00  100.00

#police generally have better scores than labor unions

plot(assn8$laborscore,assn8$policescore)

cor(assn8$laborscore,assn8$policescore)

## [1] -0.0822024

# so for every 1 unit increase in labor score, the police score on average falls slightly

# Recode laborscore into intervals
assn8$labor_interval <- cut(
  assn8$laborscore,
  breaks = c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100),
  include.lowest = TRUE
)

tabyl(assn8$labor_interval)

##  assn8$labor_interval    n    percent
##                [0,10]  155 0.03209774
##               (10,20]  162 0.03354732
##               (20,30]  235 0.04866432
##               (30,40]  364 0.07537793
##               (40,50] 1163 0.24083661
##               (50,60]  695 0.14392214
##               (60,70]  682 0.14123007
##               (70,80]  253 0.05239180
##               (80,90]  649 0.13439636
##              (90,100]  471 0.09753572

# interesting that they generally have a positive perception (scoring mostly over 50)

assn8 %>%
  group_by(labor_interval) %>%
  summarise(mean_score = mean(policescore))

## # A tibble: 10 × 2
##    labor_interval mean_score
##    <fct>               <dbl>
##  1 [0,10]               63.8
##  2 (10,20]              75.7
##  3 (20,30]              74.3
##  4 (30,40]              74.9
##  5 (40,50]              70.7
##  6 (50,60]              70.6
##  7 (60,70]              69.2
##  8 (70,80]              66.9
##  9 (80,90]              68.7
## 10 (90,100]             63.4

#not a wild distinction but generally low labor scores correlate to higher police scores

p<-assn8 %>%
  group_by(labor_interval) %>%
  summarise(mean_score = mean(policescore, na.rm = TRUE)) %>%
  ggplot(aes(x = labor_interval, y = mean_score)) +
  geom_col(fill = "steelblue") +
  labs(title = "Mean Labor Score by Feeling Thermometer Interval",
       x = "Feeling Thermometer Interval",
       y = "Mean Police Score") +
  theme_minimal()

p

# again, can see a slight correlation between lower labor feelings and higher police feelings

model1 <- lm(policescore ~ laborscore, data = assn8)
model2 <- lm(policescore ~ laborscore + I(laborscore^2), data = assn8)
model3 <- lm(policescore ~ laborscore + I(laborscore^2) + I(laborscore^3), data = assn8)
#centering 

summary(model3)

## 
## Call:
## lm(formula = policescore ~ laborscore + I(laborscore^2) + I(laborscore^3), 
##     data = assn8)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -73.43 -13.35   3.64  17.73  35.03 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.530e+01  1.939e+00  33.684  < 2e-16 ***
## laborscore       5.388e-01  1.338e-01   4.026 5.77e-05 ***
## I(laborscore^2) -1.056e-02  2.865e-03  -3.685 0.000231 ***
## I(laborscore^3)  5.138e-05  1.773e-05   2.897 0.003780 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.11 on 4825 degrees of freedom
## Multiple R-squared:  0.01368,    Adjusted R-squared:  0.01306 
## F-statistic:  22.3 on 3 and 4825 DF,  p-value: 2.445e-14

#all terms are statistically significant but I need help interpreting this a bit


# Generate predictions
assn8$pred1 <- predict(model1)
assn8$pred2 <- predict(model2)
assn8$pred3 <- predict(model3)

p<-ggplot(assn8, aes(x = laborscore)) +
  geom_line(aes(y = pred1, color = "Linear"), linewidth = 1) +
  geom_line(aes(y = pred2, color = "Quadratic"), linewidth = 1) +
  geom_line(aes(y = pred3, color = "Cubic"), linewidth = 1) +
  labs(title = "Predicted Values from Polynomial Models",
       x = "laborscore",
       y = "Predicted policescore",
       color = "Model") +
  theme_minimal()

p

cor(assn8$laborscore, assn8$policescore)

## [1] -0.0822024

cor(assn8$laborscore, assn8$policescore, method = "spearman")

## [1] -0.08167769

cor(assn8$laborscore, assn8$policescore, method = "kendall")

## [1] -0.06230617

#not too much variation between the three; all are negative

Stats_assignment8

Matthew Reyes

2025-11-11