ggplot(NHL, aes(x=Age,y=Cap_Hit)) +
geom_point(col = "blue", size = .5
)
ggplot(NHL, aes(x=Contract_Length,y=Cap_Hit)) +
geom_point(col = "blue", size = .5
)
fmla10<-Cap_Hit~P_per_game+Wt+Draft+Age+Pos+PIM+GP
fmla11<-Cap_Hit~P_per_game+Wt+Draft+Age+Pos+PIMPG
fmla12<-Cap_Hit~P_per_game+Wt+Draft+Age+Pos+PIM
reg10<-lm(fmla10,NHL)
reg11<-lm(fmla11,NHL)
reg12<-lm(fmla12,NHL)
summary(reg10)
##
## Call:
## lm(formula = fmla10, data = NHL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4829161 -989929 -101584 885057 6034866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4620621 759935 -6.080 1.79e-09 ***
## P_per_game 5506638 215621 25.539 < 2e-16 ***
## Wt 13354 3844 3.474 0.000539 ***
## Draft -90258 23584 -3.827 0.000139 ***
## Age 101439 15782 6.427 2.13e-10 ***
## Pos -530823 117243 -4.528 6.80e-06 ***
## PIM -1166 2941 -0.396 0.691838
## GP 11088 2742 4.043 5.74e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1600000 on 873 degrees of freedom
## Multiple R-squared: 0.62, Adjusted R-squared: 0.6169
## F-statistic: 203.5 on 7 and 873 DF, p-value: < 2.2e-16
summary(reg11)
##
## Call:
## lm(formula = fmla11, data = NHL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5534696 -979541 -44078 889016 5957503
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4800519 747776 -6.420 2.24e-10 ***
## P_per_game 5903724 199234 29.632 < 2e-16 ***
## Wt 13498 3848 3.508 0.000475 ***
## Draft -108843 23482 -4.635 4.11e-06 ***
## Age 121965 15259 7.993 4.16e-15 ***
## Pos -548525 118479 -4.630 4.22e-06 ***
## PIMPG 117624 118162 0.995 0.319797
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1616000 on 874 degrees of freedom
## Multiple R-squared: 0.6117, Adjusted R-squared: 0.6091
## F-statistic: 229.5 on 6 and 874 DF, p-value: < 2.2e-16
summary(reg12)
##
## Call:
## lm(formula = fmla12, data = NHL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5446003 -992462 -52828 859348 5948068
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4523781 766196 -5.904 5.07e-09 ***
## P_per_game 5821757 202799 28.707 < 2e-16 ***
## Wt 12472 3872 3.221 0.00132 **
## Draft -106080 23460 -4.522 6.98e-06 ***
## Age 117561 15404 7.632 6.05e-14 ***
## Pos -544178 118221 -4.603 4.78e-06 ***
## PIM 4841 2560 1.891 0.05899 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1614000 on 874 degrees of freedom
## Multiple R-squared: 0.6129, Adjusted R-squared: 0.6102
## F-statistic: 230.6 on 6 and 874 DF, p-value: < 2.2e-16
ct <- rpart(Cap_Hit ~P_per_game+Wt+Draft+Age+Pos+PIMPG, data = SD,control = rpart.control(maxdepth = 3))
fancyRpartPlot(ct, main = "Scorers Classifcation Tree", sub ="Marshall John Larson", palettes = c("Reds"), type = 1)
*40 PPG players in the NHL, 5% of the NHL
*18 outliers that have not played many games, 2% of NHL
*311 players that are not significant, 35% of the NHL
*The amount of players that have a lower amount of PPG and yet high GP is disproportionally defenseman
*235 decent players in the NHL that have a good sample size and a good amount of PPG
*1/3 of the NHL is above .44 PPG
nn1 <- neuralnet(
Cap_Hit ~ P_per_game+Wt+Draft+Age+Pos+PIM,
data = NHL,
hidden=2,
linear.output = FALSE
)
plot(nn1)
nn2 <- neuralnet(
Cap_Hit ~ P_per_game+Wt+Draft+Age+Pos+PIM+GP,
data=NHL,
hidden=2,
linear.output = FALSE
)
plot(nn2)
nn3<- neuralnet(
Cap_Hit ~ P_per_game+Wt+Draft+Age+Pos+PIMPG,
data = NHL,
hidden=2,
linear.output = FALSE
)
plot(nn3)
The error in the neural networks made them unpredictable. Limiting the information that I could glean from them.