library(readr)
crime <- read_csv("~/Senior Project/crime.csv")
plot(ViolentCrimes~Year, data=crime, pch=19)

crime1<- crime %>%
filter(Year<=1992)
plot(ViolentCrimes~Year, data=crime1, pch=19)
abline(lm(ViolentCrimes~Year, data=crime1))

crime2<- crime %>%
filter(Year>=1992 & Year<=2014)
plot(ViolentCrimes~Year, data=crime2, pch=19)
abline(lm(ViolentCrimes~Year, data=crime2))

crime3<- crime %>%
filter(Year>2013)
plot(ViolentCrimes~Year, data=crime3, pch=19)
abline(lm(ViolentCrimes~Year, data=crime3))

y<- crime$ViolentCrimes
x<- crime$Year
xsq<-x^2
xcub<-x^3
xquar<-x^4
x5<-x^5
x6<-x^6
x7<-x^7
x8<-x^8
x9<-x^9
x10<-x^10
plot(x,y, pch=19, xlab="Year", ylab="Violent Crimes per 100k People")
fit1<- lm(y~x)
abline(fit1, col = "red")
fit3<- lm(y~x+xsq+xcub)
xv<-seq(min(x),max(x),1)
yv<-predict(fit3, list(x=xv, xsq=xv^2, xcub=xv^3))
lines(xv,yv, col = "blue")
fit10<- lm(y~poly(x,10))
xv<-seq(min(x),max(x),1)
yv<-predict(fit10, list(x=xv, xsq=xv^2, xcub=xv^3, xquar=xv^4, x5=xv^5, x6=xv^6,x7=xv^7,x8=xv^8,x9=xv^9,x10=xv^6))
lines(xv,yv, col = "black")

y<- Auto$mpg
x<- Auto$horsepower
xsq<-x^2
xcub<-x^3
xquar<-x^4
x5<-x^5
x6<-x^6
x7<-x7
x8<-x^8
x9<-x^9
x10<-x^10
plot(x,y,pch=20, xlab="Horsepower", ylab="Miles Per Gallon")
fit1<- lm(y~x)
summary(fit1)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5710 -3.2592 -0.3435 2.7630 16.9240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.935861 0.717499 55.66 <2e-16 ***
## x -0.157845 0.006446 -24.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.906 on 390 degrees of freedom
## Multiple R-squared: 0.6059, Adjusted R-squared: 0.6049
## F-statistic: 599.7 on 1 and 390 DF, p-value: < 2.2e-16
abline(fit1, col = "red", lwd = 4)
fit2<- lm(y~x+xsq)
summary(fit2)
##
## Call:
## lm(formula = y ~ x + xsq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.7135 -2.5943 -0.0859 2.2868 15.8961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.9000997 1.8004268 31.60 <2e-16 ***
## x -0.4661896 0.0311246 -14.98 <2e-16 ***
## xsq 0.0012305 0.0001221 10.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.374 on 389 degrees of freedom
## Multiple R-squared: 0.6876, Adjusted R-squared: 0.686
## F-statistic: 428 on 2 and 389 DF, p-value: < 2.2e-16
xv<-seq(min(x),max(x),1)
yv<-predict(fit2, list(x=xv, xsq=xv^2))
lines(xv,yv, col = "green", lwd = 4)
fit3<- lm(y~x+xsq+xcub)
summary(fit3)
##
## Call:
## lm(formula = y ~ x + xsq + xcub)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.7039 -2.4491 -0.1519 2.2035 15.8159
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.068e+01 4.563e+00 13.298 < 2e-16 ***
## x -5.689e-01 1.179e-01 -4.824 2.03e-06 ***
## xsq 2.079e-03 9.479e-04 2.193 0.0289 *
## xcub -2.147e-06 2.378e-06 -0.903 0.3673
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.375 on 388 degrees of freedom
## Multiple R-squared: 0.6882, Adjusted R-squared: 0.6858
## F-statistic: 285.5 on 3 and 388 DF, p-value: < 2.2e-16
xv<-seq(min(x),max(x),1)
yv<-predict(fit3, list(x=xv, xsq=xv^2, xcub=xv^3))
lines(xv,yv, col = "blue", lwd = 4)
fit4<- lm(y~poly(x,4))
summary(fit4)$r.squared
## [1] 0.6893436
xv<-seq(min(x),max(x),1)
yv<-predict(fit4, list(x=xv, xsq=xv^2, xcub=xv^3, xquar=xv^4))
lines(xv,yv, col = "purple", lwd = 4)
fit5<- lm(y~poly(x,5))
summary(fit5)$r.squared
## [1] 0.696739
xv<-seq(min(x),max(x),1)
yv<-predict(fit5, list(x=xv, xsq=xv^2, xcub=xv^3, xquar=xv^4, x5=xv^5))
lines(xv,yv, col = "pink", lwd = 4)
fit17<- lm(y~poly(x,17))
summary(fit17)
##
## Call:
## lm(formula = y ~ poly(x, 17))
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.7840 -2.4482 -0.1369 2.3586 15.1625
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.4459 0.2113 110.967 < 2e-16 ***
## poly(x, 17)1 -120.1377 4.1833 -28.719 < 2e-16 ***
## poly(x, 17)2 44.0895 4.1833 10.540 < 2e-16 ***
## poly(x, 17)3 -3.9488 4.1833 -0.944 0.345797
## poly(x, 17)4 -5.1878 4.1833 -1.240 0.215702
## poly(x, 17)5 13.2722 4.1833 3.173 0.001635 **
## poly(x, 17)6 -8.5462 4.1833 -2.043 0.041757 *
## poly(x, 17)7 7.9806 4.1833 1.908 0.057190 .
## poly(x, 17)8 2.1727 4.1833 0.519 0.603800
## poly(x, 17)9 -3.9182 4.1833 -0.937 0.349549
## poly(x, 17)10 -2.6146 4.1833 -0.625 0.532346
## poly(x, 17)11 3.5636 4.1833 0.852 0.394834
## poly(x, 17)12 1.1451 4.1833 0.274 0.784446
## poly(x, 17)13 0.6041 4.1833 0.144 0.885262
## poly(x, 17)14 -3.8267 4.1833 -0.915 0.360908
## poly(x, 17)15 13.4922 4.1833 3.225 0.001369 **
## poly(x, 17)16 -14.5099 4.1833 -3.469 0.000584 ***
## poly(x, 17)17 9.6578 4.1833 2.309 0.021506 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.183 on 374 degrees of freedom
## Multiple R-squared: 0.7252, Adjusted R-squared: 0.7127
## F-statistic: 58.07 on 17 and 374 DF, p-value: < 2.2e-16
xv<-seq(min(x),max(x),1)
yv<-predict(fit17, list(x=xv, xsq=xv^2, xcub=xv^3, xquar=xv^4, x5=xv^5, x6=xv^6,x7=xv^7,x8=xv^8,x9=xv^9,x10=xv^10,x11=xv^11,x12=xv^12,x13=xv^13,x14=xv^14,x15=xv^15,x16=xv^16,x17=xv^17))
lines(xv,yv, col = "black", lwd = 4)

library(caTools)
set.seed(1000)
sample <- sample(c(TRUE, FALSE), nrow(Auto), replace=TRUE, prob=c(0.75,0.25))
train <- Auto[sample, ]
test <- Auto[!sample, ]
testModel=lm(mpg~horsepower, data=train)
pred.test=predict(testModel,test)
mean((pred.test-test$mpg)^2)
## [1] 24.66337
testModel2=lm(mpg~poly(horsepower,2), data=train)
pred.test2=predict(testModel2,test)
mean((pred.test2-test$mpg)^2)
## [1] 20.52966
testModel3=lm(mpg~poly(horsepower,3), data=train)
pred.test3=predict(testModel3,test)
mean((pred.test3-test$mpg)^2)
## [1] 20.59906
testModel4=lm(mpg~poly(horsepower,4), data=train)
pred.test4=predict(testModel4,test)
mean((pred.test4-test$mpg)^2)
## [1] 20.45034
testModel5=lm(mpg~poly(horsepower,5), data=train)
pred.test5=predict(testModel5,test)
mean((pred.test5-test$mpg)^2)
## [1] 19.85817
testModel7=lm(mpg~poly(horsepower,7), data=train)
pred.test7=predict(testModel7,test)
mean((pred.test7-test$mpg)^2)
## [1] 20.42431
testModel8=lm(mpg~poly(horsepower,8), data=train)
pred.test8=predict(testModel8,test)
mean((pred.test8-test$mpg)^2)
## [1] 20.74518
testModel9=lm(mpg~poly(horsepower,9), data=train)
pred.test9=predict(testModel9,test)
mean((pred.test9-test$mpg)^2)
## [1] 20.88395
testModel10=lm(mpg~poly(horsepower,10), data=train)
pred.test10=predict(testModel10,test)
mean((pred.test10-test$mpg)^2)
## [1] 20.93814