library(tidyverse)
library(ggplot2)
Something that’s become big in my life recently with the coming football season has been fantasy football. I could use linear regression to help make fantasy football picks. Some predictors could be:
previous season points
number of possessions
number of interceptions
number of fumbles
time spent out or in IR
setwd("/Users/tarra/Documents/Intro to Analytics Modeling/Fall2020hw5/data 8.2")
#read in the data file
crime_data <- read.table("uscrime.txt", stringsAsFactors = FALSE, header = TRUE)
#make sure headers are correct
head(crime_data)
## M So Ed Po1 Po2 LF M.F Pop NW U1 U2 Wealth Ineq Prob
## 1 15.1 1 9.1 5.8 5.6 0.510 95.0 33 30.1 0.108 4.1 3940 26.1 0.084602
## 2 14.3 0 11.3 10.3 9.5 0.583 101.2 13 10.2 0.096 3.6 5570 19.4 0.029599
## 3 14.2 1 8.9 4.5 4.4 0.533 96.9 18 21.9 0.094 3.3 3180 25.0 0.083401
## 4 13.6 0 12.1 14.9 14.1 0.577 99.4 157 8.0 0.102 3.9 6730 16.7 0.015801
## 5 14.1 0 12.1 10.9 10.1 0.591 98.5 18 3.0 0.091 2.0 5780 17.4 0.041399
## 6 12.1 0 11.0 11.8 11.5 0.547 96.4 25 4.4 0.084 2.9 6890 12.6 0.034201
## Time Crime
## 1 26.2011 791
## 2 25.2999 1635
## 3 24.3006 578
## 4 29.9012 1969
## 5 21.2998 1234
## 6 20.9995 682
#creating the model
crime_model <- lm(Crime ~ ., data = crime_data)
summary(crime_model)
##
## Call:
## lm(formula = Crime ~ ., data = crime_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -395.74 -98.09 -6.69 112.99 512.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.984e+03 1.628e+03 -3.675 0.000893 ***
## M 8.783e+01 4.171e+01 2.106 0.043443 *
## So -3.803e+00 1.488e+02 -0.026 0.979765
## Ed 1.883e+02 6.209e+01 3.033 0.004861 **
## Po1 1.928e+02 1.061e+02 1.817 0.078892 .
## Po2 -1.094e+02 1.175e+02 -0.931 0.358830
## LF -6.638e+02 1.470e+03 -0.452 0.654654
## M.F 1.741e+01 2.035e+01 0.855 0.398995
## Pop -7.330e-01 1.290e+00 -0.568 0.573845
## NW 4.204e+00 6.481e+00 0.649 0.521279
## U1 -5.827e+03 4.210e+03 -1.384 0.176238
## U2 1.678e+02 8.234e+01 2.038 0.050161 .
## Wealth 9.617e-02 1.037e-01 0.928 0.360754
## Ineq 7.067e+01 2.272e+01 3.111 0.003983 **
## Prob -4.855e+03 2.272e+03 -2.137 0.040627 *
## Time -3.479e+00 7.165e+00 -0.486 0.630708
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 209.1 on 31 degrees of freedom
## Multiple R-squared: 0.8031, Adjusted R-squared: 0.7078
## F-statistic: 8.429 on 15 and 31 DF, p-value: 3.539e-07
#here we're manually creating some test data to run predictions on
crime_test <- data.frame(M = 14.0, So = 0, Ed = 10.0, Po1 = 12.0, Po2 = 15.5,
LF = 0.640, M.F = 94.0, Pop = 150, NW = 1.1, U1 = 0.120,
U2 = 3.6, Wealth = 3200, Ineq = 20.1, Prob = 0.04, Time = 39.0)
crime_predict <- predict(crime_model, crime_test)
summary(crime_predict)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 155.4 155.4 155.4 155.4 155.4 155.4
#Create a second model
crime_model_2 <- lm( Crime ~ M + Ed + Po1 + U2 + Ineq + Prob, data = crime_data)
summary(crime_model_2)
##
## Call:
## lm(formula = Crime ~ M + Ed + Po1 + U2 + Ineq + Prob, data = crime_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -470.68 -78.41 -19.68 133.12 556.23
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5040.50 899.84 -5.602 1.72e-06 ***
## M 105.02 33.30 3.154 0.00305 **
## Ed 196.47 44.75 4.390 8.07e-05 ***
## Po1 115.02 13.75 8.363 2.56e-10 ***
## U2 89.37 40.91 2.185 0.03483 *
## Ineq 67.65 13.94 4.855 1.88e-05 ***
## Prob -3801.84 1528.10 -2.488 0.01711 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 200.7 on 40 degrees of freedom
## Multiple R-squared: 0.7659, Adjusted R-squared: 0.7307
## F-statistic: 21.81 on 6 and 40 DF, p-value: 3.418e-11
crime_predict_2 <- predict(crime_model_2, crime_test)
summary(crime_predict_2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1304 1304 1304 1304 1304 1304