This is a continuation of this project <https://rpubs.com/nurfnick/720194
set.seed(43)
samp_mean <- function(x, i) {
mean(x[i])
}
GP <- na.omit(data$GP)
results <- boot(GP, samp_mean, 100)
plot(results)
boot.ci(results, type="perc")
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 100 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = results, type = "perc")
##
## Intervals :
## Level Percentile
## 95% ( 3.052, 3.500 )
## Calculations and Intervals on Original Scale
## Some percentile intervals may be unstable
t.test(GP, mu = 4)
##
## One Sample t-test
##
## data: GP
## t = -6.8861, df = 29, p-value = 1.448e-07
## alternative hypothesis: true mean is not equal to 4
## 95 percent confidence interval:
## 3.048862 3.484472
## sample estimates:
## mean of x
## 3.266667
START HERE>>>> \[ t = \frac{\mu-\overline x}{SE} \]
results$t0
## [1] 3.266667
SE = sd(results$t)
c(results$t0-2*SE,results$t0+2*SE)
## [1] 3.059878 3.473455
t = (3.5-results$t0)/SE
1-pt(t,99)
## [1] 0.01311133
mean(3.5<=results$t)
## [1] 0.03
data
## Rk X AvAge GP W L OL PTS PTS. GF GA SOW SOL SRS
## 1 1 Philadelphia Flyers 27.2 4 3 1 0 6 0.750 15 11 0 0 0.38
## 2 2 Toronto Maple Leafs 29.0 4 3 1 0 6 0.750 14 12 0 0 1.85
## 3 3 Vegas Golden Knights 28.8 3 3 0 0 6 1.000 11 5 0 0 1.63
## 4 4 Washington Capitals 30.2 4 2 0 2 6 0.750 15 13 0 1 -0.38
## 5 5 New Jersey Devils 26.1 3 2 0 1 5 0.833 8 6 0 1 0.13
## 6 6 Montreal Canadiens 28.6 3 2 0 1 5 0.833 12 7 0 0 0.83
## 7 7 Calgary Flames 28.1 3 2 0 1 5 0.833 11 6 0 0 0.29
## 8 8 Colorado Avalanche 27.1 3 2 1 0 4 0.667 12 6 0 0 0.63
## 9 9 St. Louis Blues 28.3 3 2 1 0 4 0.667 9 13 0 0 -0.63
## 10 10 Winnipeg Jets 28.0 3 2 1 0 4 0.667 9 9 0 0 1.27
## 11 11 Detroit Red Wings 29.1 4 2 2 0 4 0.500 9 10 0 0 -0.39
## 12 12 New York Islanders 28.8 3 2 1 0 4 0.667 5 5 0 0 -0.13
## 13 13 Carolina Hurricanes 27.0 3 2 1 0 4 0.667 9 6 0 0 1.20
## 14 14 Florida Panthers 27.8 2 2 0 0 4 1.000 10 6 0 0 -0.75
## 15 15 Nashville Predators 28.7 3 2 1 0 4 0.667 10 7 0 0 0.51
## 16 16 Tampa Bay Lightning 27.6 2 2 0 0 4 1.000 10 3 0 0 0.75
## 17 17 Minnesota Wild 29.6 3 2 1 0 4 0.667 8 7 0 0 -0.38
## 18 18 Pittsburgh Penguins 28.7 4 2 2 0 4 0.500 13 18 1 0 -0.38
## 19 19 Anaheim Ducks 28.1 3 1 1 1 3 0.500 4 7 0 0 0.88
## 20 20 Columbus Blue Jackets 26.6 4 1 2 1 3 0.375 8 13 0 0 -0.90
## 21 21 Boston Bruins 28.8 3 1 1 1 3 0.500 3 5 1 0 -0.13
## 22 22 Ottawa Senators 26.9 3 1 1 1 3 0.500 10 10 0 0 1.66
## 23 23 Arizona Coyotes 28.5 3 1 1 1 3 0.500 10 10 0 1 -0.13
## 24 24 Buffalo Sabres 27.3 4 1 3 0 2 0.250 11 12 0 0 0.38
## 25 25 Edmonton Oilers 27.7 4 1 3 0 2 0.250 10 15 0 0 -2.19
## 26 26 New York Rangers 25.6 3 1 2 0 2 0.333 8 8 0 0 0.13
## 27 27 Vancouver Canucks 27.4 4 1 3 0 2 0.250 9 16 0 0 -2.69
## 28 28 Los Angeles Kings 28.2 3 0 1 2 2 0.333 8 11 0 0 -0.13
## 29 29 San Jose Sharks 29.3 3 1 2 0 2 0.333 10 13 1 0 -1.88
## 30 30 Chicago Blackhawks 26.9 4 0 3 1 1 0.125 9 20 0 0 0.00
## 31 31 Dallas Stars NA NA NA NA NA NA NA NA NA NA NA NA
## SOS TG.G EVGF EVGA PP PPO PP. PPA PPOA PK. SH SHA PIM.G oPIM.G S
## 1 -0.63 6.50 12 7 3 12 25.00 4 15 73.33 0 0 7.5 6.0 99
## 2 1.35 6.50 8 9 6 14 42.86 3 17 82.35 0 0 13.3 9.3 135
## 3 -0.38 5.33 10 3 0 7 0.00 1 8 87.50 1 1 5.3 4.7 90
## 4 -0.63 7.00 13 9 2 8 25.00 3 16 81.25 0 1 9.3 5.3 98
## 5 -0.21 4.67 7 1 1 10 10.00 4 13 69.23 0 1 11.0 9.0 81
## 6 -0.84 6.33 6 4 4 10 40.00 2 14 85.71 2 1 11.0 8.3 102
## 7 -1.38 5.67 5 4 6 16 37.50 1 12 91.67 0 1 8.7 11.3 93
## 8 -1.38 6.00 4 5 8 17 47.06 1 9 88.89 0 0 6.7 12.0 89
## 9 0.71 7.33 9 5 0 9 0.00 8 14 42.86 0 0 10.0 6.7 86
## 10 1.27 6.00 7 5 2 13 15.38 4 9 55.56 0 0 7.3 10.0 90
## 11 -0.14 4.75 8 8 1 7 14.29 2 11 81.82 0 0 11.8 9.8 99
## 12 -0.13 3.33 3 4 2 17 11.76 1 14 92.86 0 0 14.7 12.7 73
## 13 0.20 5.00 6 5 3 12 25.00 1 9 88.89 0 0 7.7 9.7 98
## 14 -2.75 8.00 7 3 3 8 37.50 3 5 40.00 0 0 5.0 8.0 64
## 15 -0.49 5.67 9 6 1 11 9.09 1 8 87.50 0 0 5.3 7.3 101
## 16 -2.75 6.50 8 1 2 7 28.57 2 7 71.43 0 0 7.0 7.0 70
## 17 -0.71 5.00 8 6 0 16 0.00 1 11 90.91 0 0 8.7 12.0 105
## 18 0.63 7.75 8 14 4 16 25.00 4 15 73.33 1 0 7.5 8.0 124
## 19 1.88 3.67 4 7 0 5 0.00 0 7 100.00 0 0 5.3 4.0 71
## 20 0.35 5.25 8 12 0 7 0.00 1 9 88.89 0 0 9.5 8.5 127
## 21 0.21 2.67 0 5 2 10 20.00 0 13 100.00 1 0 11.7 9.7 92
## 22 1.66 6.67 7 6 3 15 20.00 4 14 71.43 0 0 10.0 14.0 84
## 23 0.21 6.67 6 6 3 12 25.00 3 13 76.92 1 1 10.0 9.3 95
## 24 0.63 5.75 8 11 3 15 20.00 1 5 80.00 0 0 3.8 8.8 134
## 25 -0.94 6.25 7 11 2 18 11.11 2 14 85.71 1 2 9.5 11.5 138
## 26 0.13 5.33 5 5 3 17 17.65 3 14 78.57 0 0 10.0 16.0 106
## 27 -0.94 6.25 8 9 0 15 0.00 7 21 66.67 1 0 13.0 10.0 134
## 28 0.88 6.33 6 9 2 12 16.67 2 17 88.24 0 0 12.0 8.7 92
## 29 -1.21 7.67 5 11 5 11 45.45 2 10 80.00 0 0 8.0 8.7 95
## 30 2.75 7.25 4 15 5 12 41.67 5 15 66.67 0 0 7.5 6.0 124
## 31 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## S. SA SV. SO
## 1 15.2 144 0.924 1
## 2 10.4 103 0.883 0
## 3 12.2 76 0.934 0
## 4 15.3 114 0.886 0
## 5 9.9 115 0.948 0
## 6 11.8 95 0.926 0
## 7 11.8 93 0.935 1
## 8 13.5 78 0.923 1
## 9 10.5 91 0.857 0
## 10 10.0 105 0.914 0
## 11 9.1 127 0.921 0
## 12 6.8 83 0.940 2
## 13 9.2 68 0.912 1
## 14 15.6 64 0.906 0
## 15 9.9 98 0.929 0
## 16 14.3 60 0.950 0
## 17 7.6 94 0.926 0
## 18 10.5 96 0.813 0
## 19 5.6 96 0.927 1
## 20 6.3 132 0.902 0
## 21 3.3 70 0.929 0
## 22 11.9 91 0.890 0
## 23 10.5 97 0.897 0
## 24 8.2 101 0.881 0
## 25 7.2 145 0.897 0
## 26 7.5 84 0.905 1
## 27 6.7 144 0.889 0
## 28 8.7 95 0.884 0
## 29 10.5 96 0.865 0
## 30 7.3 134 0.851 0
## 31 NA NA NA NA
data$GP[is.na(data$GP)] <- 0
data$GP
## [1] 4 4 3 4 3 3 3 3 3 3 4 3 3 2 3 2 3 4 3 4 3 3 3 4 4 3 4 3 3 4 0
samp_min <- function(x, i) {
min(x[i])
}
results2 <- boot(data$GP,samp_min, R = 10000)
plot(results2)
xbar = mean(results2$t)
s = sd(results2$t)
c(xbar - 2*s,xbar + 2*s)
## [1] -1.30744 2.83684
boot.ci(results2, type = "perc")
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = results2, type = "perc")
##
## Intervals :
## Level Percentile
## 95% ( 0, 3 )
## Calculations and Intervals on Original Scale
mean(0 == results2$t)
## [1] 0.6391
%%%%Start Here
library(caret)
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
##
## melanoma
## Loading required package: ggplot2
data$W[is.na(data$W)] <- 0
data$PTS[is.na(data$PTS)] <- 0
trainingSamples <- createDataPartition(data$Rk ,p=.66,list = FALSE)
trainingSamples
## Resample1
## [1,] 1
## [2,] 3
## [3,] 5
## [4,] 6
## [5,] 7
## [6,] 8
## [7,] 9
## [8,] 10
## [9,] 13
## [10,] 14
## [11,] 15
## [12,] 16
## [13,] 17
## [14,] 19
## [15,] 20
## [16,] 22
## [17,] 23
## [18,] 24
## [19,] 26
## [20,] 27
## [21,] 28
## [22,] 29
## [23,] 30
trainData <- data[trainingSamples,]
testData <- data[-trainingSamples,]
testData
## Rk X AvAge GP W L OL PTS PTS. GF GA SOW SOL SRS SOS
## 2 2 Toronto Maple Leafs 29.0 4 3 1 0 6 0.750 14 12 0 0 1.85 1.35
## 4 4 Washington Capitals 30.2 4 2 0 2 6 0.750 15 13 0 1 -0.38 -0.63
## 11 11 Detroit Red Wings 29.1 4 2 2 0 4 0.500 9 10 0 0 -0.39 -0.14
## 12 12 New York Islanders 28.8 3 2 1 0 4 0.667 5 5 0 0 -0.13 -0.13
## 18 18 Pittsburgh Penguins 28.7 4 2 2 0 4 0.500 13 18 1 0 -0.38 0.63
## 21 21 Boston Bruins 28.8 3 1 1 1 3 0.500 3 5 1 0 -0.13 0.21
## 25 25 Edmonton Oilers 27.7 4 1 3 0 2 0.250 10 15 0 0 -2.19 -0.94
## 31 31 Dallas Stars NA 0 0 NA NA 0 NA NA NA NA NA NA NA
## TG.G EVGF EVGA PP PPO PP. PPA PPOA PK. SH SHA PIM.G oPIM.G S S. SA
## 2 6.50 8 9 6 14 42.86 3 17 82.35 0 0 13.3 9.3 135 10.4 103
## 4 7.00 13 9 2 8 25.00 3 16 81.25 0 1 9.3 5.3 98 15.3 114
## 11 4.75 8 8 1 7 14.29 2 11 81.82 0 0 11.8 9.8 99 9.1 127
## 12 3.33 3 4 2 17 11.76 1 14 92.86 0 0 14.7 12.7 73 6.8 83
## 18 7.75 8 14 4 16 25.00 4 15 73.33 1 0 7.5 8.0 124 10.5 96
## 21 2.67 0 5 2 10 20.00 0 13 100.00 1 0 11.7 9.7 92 3.3 70
## 25 6.25 7 11 2 18 11.11 2 14 85.71 1 2 9.5 11.5 138 7.2 145
## 31 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## SV. SO
## 2 0.883 0
## 4 0.886 0
## 11 0.921 0
## 12 0.940 2
## 18 0.813 0
## 21 0.929 0
## 25 0.897 0
## 31 NA NA
model <- lm(W ~ PTS , data = trainData)
summary(model)
##
## Call:
## lm(formula = W ~ PTS, data = trainData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7105 -0.2566 0.1974 0.1974 0.2895
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.38158 0.17806 -2.143 0.044 *
## PTS 0.54605 0.04687 11.651 1.25e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2951 on 21 degrees of freedom
## Multiple R-squared: 0.866, Adjusted R-squared: 0.8597
## F-statistic: 135.8 on 1 and 21 DF, p-value: 1.252e-10
data[trainingSamples,'Test_Train'] <- "Train"
data[-trainingSamples,'Test_Train'] <- "Test"
data$Test_Train
## [1] "Train" "Test" "Train" "Test" "Train" "Train" "Train" "Train" "Train"
## [10] "Train" "Test" "Test" "Train" "Train" "Train" "Train" "Train" "Test"
## [19] "Train" "Train" "Test" "Train" "Train" "Train" "Test" "Train" "Train"
## [28] "Train" "Train" "Train" "Test"
col.rainbow <- rainbow(2)
palette(col.rainbow)
plot(data$PTS,data$W,pch =19,col = as.factor(data$Test_Train))
abline(model)
library(ggplot2)
ggplot(data = data, mapping = aes(PTS,W,color = Test_Train))+
geom_jitter() +
geom_smooth(method = lm)
## `geom_smooth()` using formula 'y ~ x'
pre <- predict(model,data[-trainingSamples,])
pre
## 2 4 11 12 18 21 25
## 2.8947368 2.8947368 1.8026316 1.8026316 1.8026316 1.2565789 0.7105263
## 31
## -0.3815789
R2(pre,data[-trainingSamples,"W"] )
## [1] 0.861082
%%%Start Here
train.control <- trainControl(method = "cv", number = 10)
model <- train(W ~ PTS, data = data,
method = "lm",
trControl = train.control)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
# Summarize the results
print(model)
## Linear Regression
##
## 31 samples
## 1 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 28, 29, 28, 28, 27, 27, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.3002804 0.9405655 0.2767682
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(model)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7956 -0.2889 0.2044 0.2111 0.2178
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.19771 0.14867 -1.33 0.194
## PTS 0.49666 0.03839 12.94 1.42e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3155 on 29 degrees of freedom
## Multiple R-squared: 0.8523, Adjusted R-squared: 0.8473
## F-statistic: 167.4 on 1 and 29 DF, p-value: 1.423e-13