set.seed(123456)
data <- read.csv("SkillCraftDataset.csv",header=T)
data = subset(data,select = -c(GameID,Age,HoursPerWeek,TotalHours))
str(data)
## 'data.frame': 3395 obs. of 16 variables:
## $ LeagueIndex : int 5 5 4 3 3 2 1 7 4 4 ...
## $ APM : num 144 129 70 108 123 ...
## $ SelectByHotkeys : num 0.00352 0.0033 0.0011 0.00103 0.00114 ...
## $ AssignToHotkeys : num 0.00022 0.000259 0.000336 0.000213 0.000327 ...
## $ UniqueHotkeys : int 7 4 4 1 2 2 6 6 2 8 ...
## $ MinimapAttacks : num 1.10e-04 2.94e-04 2.94e-04 5.33e-05 0.00 ...
## $ MinimapRightClicks : num 0.000392 0.000432 0.000461 0.000543 0.001329 ...
## $ NumberOfPACs : num 0.00485 0.00431 0.00293 0.00378 0.00237 ...
## $ GapBetweenPACs : num 32.7 32.9 44.6 29.2 22.7 ...
## $ ActionLatency : num 40.9 42.3 75.4 53.7 62.1 ...
## $ ActionsInPAC : num 4.75 4.84 4.04 4.92 9.37 ...
## $ TotalMapExplored : int 28 22 22 19 15 16 15 45 29 27 ...
## $ WorkersMade : num 0.001397 0.001193 0.000745 0.000426 0.001174 ...
## $ UniqueUnitsMade : int 6 5 6 7 4 6 5 9 7 6 ...
## $ ComplexUnitsMade : num 0 0 0 0 0 ...
## $ ComplexAbilitiesUsed: num 0.00 2.08e-04 1.89e-04 3.84e-04 1.93e-05 ...
mydat <- subset(data,select=c(APM,SelectByHotkeys, MinimapRightClicks,
NumberOfPACs, GapBetweenPACs, ActionLatency, ActionsInPAC,WorkersMade))
summary(mydat)
## APM SelectByHotkeys MinimapRightClicks
## Min. : 22.06 Min. :0.000000 Min. :0.0000000
## 1st Qu.: 79.90 1st Qu.:0.001258 1st Qu.:0.0001401
## Median :108.01 Median :0.002500 Median :0.0002815
## Mean :117.05 Mean :0.004299 Mean :0.0003874
## 3rd Qu.:142.79 3rd Qu.:0.005133 3rd Qu.:0.0005141
## Max. :389.83 Max. :0.043088 Max. :0.0040408
## NumberOfPACs GapBetweenPACs ActionLatency ActionsInPAC
## Min. :0.000679 Min. : 6.667 Min. : 24.09 Min. : 2.039
## 1st Qu.:0.002754 1st Qu.: 28.958 1st Qu.: 50.45 1st Qu.: 4.273
## Median :0.003395 Median : 36.724 Median : 60.93 Median : 5.096
## Mean :0.003463 Mean : 40.362 Mean : 63.74 Mean : 5.273
## 3rd Qu.:0.004027 3rd Qu.: 48.291 3rd Qu.: 73.68 3rd Qu.: 6.034
## Max. :0.007971 Max. :237.143 Max. :176.37 Max. :18.558
## WorkersMade
## Min. :0.0000770
## 1st Qu.:0.0006830
## Median :0.0009052
## Mean :0.0010317
## 3rd Qu.:0.0012587
## Max. :0.0051493
dim(data)
## [1] 3395 16
library(quantreg)
## Warning: package 'quantreg' was built under R version 3.5.3
## Loading required package: SparseM
## Warning: package 'SparseM' was built under R version 3.5.2
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
fit <- rq(APM~.,data=mydat,tau=.75)
summary(fit)
## Warning in summary.rq(fit): 6 non-positive fis
##
## Call: rq(formula = APM ~ ., tau = 0.75, data = mydat)
##
## tau: [1] 0.75
##
## Coefficients:
## Value Std. Error t value Pr(>|t|)
## (Intercept) -95.21231 1.73972 -54.72863 0.00000
## SelectByHotkeys 5522.04132 42.61041 129.59370 0.00000
## MinimapRightClicks 1313.28280 380.50402 3.45143 0.00056
## NumberOfPACs 28878.41353 236.19330 122.26602 0.00000
## GapBetweenPACs 0.03350 0.00803 4.17046 0.00003
## ActionLatency 0.06178 0.01350 4.57524 0.00000
## ActionsInPAC 16.33639 0.12031 135.78602 0.00000
## WorkersMade 792.57378 220.31015 3.59754 0.00033
n=nrow(mydat)
B=1e3
Nindex=matrix(0,B,n)
BootS=matrix(NA,B,length(fit$coefficients))
S0=summary(fit)$coefficients
## Warning in summary.rq(fit): 6 non-positive fis
for(b in 1:B){
J=sample(n,n,replace=T)
Nindex[b,sort(unique(J))]=table(J)
f = rq(APM~.,data=mydat[J,],tau=.75)
BootS[b,]=f$coefficients
}
BootsResult=matrix(NA,8,5)
rownames(BootsResult)=c('intercept',names(fit$coefficients)[-1])
colnames(BootsResult)=c('mean','bias','std','2.5%','97.5%')
BootsResult[,1]=colMeans(BootS)
BootsResult[,2]=colMeans(BootS)-S0[,1]
BootsResult[,3]=apply(BootS,2,sd)
BootsResult[,4]=apply(BootS,2,function(x) quantile(x,prob=0.025))
BootsResult[,5]=apply(BootS,2,function(x) quantile(x,prob=0.975))
BootsResult
## mean bias std 2.5%
## intercept -9.538239e+01 -1.700817e-01 2.98576048 -1.006886e+02
## SelectByHotkeys 5.518170e+03 -3.871438e+00 43.20918845 5.432552e+03
## MinimapRightClicks 1.338880e+03 2.559756e+01 463.32016520 4.546420e+02
## NumberOfPACs 2.890649e+04 2.807302e+01 375.38660638 2.819694e+04
## GapBetweenPACs 3.753668e-02 4.036357e-03 0.01034546 1.890149e-02
## ActionLatency 5.894630e-02 -2.833624e-03 0.02101409 1.221246e-02
## ActionsInPAC 1.637788e+01 4.149106e-02 0.21401288 1.598342e+01
## WorkersMade 6.574170e+02 -1.351568e+02 236.90726554 1.846755e+02
## 97.5%
## intercept -8.890634e+01
## SelectByHotkeys 5.590324e+03
## MinimapRightClicks 2.250195e+03
## NumberOfPACs 2.964103e+04
## GapBetweenPACs 5.984609e-02
## ActionLatency 9.471879e-02
## ActionsInPAC 1.682811e+01
## WorkersMade 1.103795e+03
hist(BootS[,1],main='intercept',xlab='Intercept',ylab='Freq',cex.axis=1.5,cex.lab=1.5);abline(v=S0[1,1],col='red');abline(v=BootsResult[1,4],col="blue",lty="dashed");abline(v=BootsResult[1,5],col="blue",lty="dashed");legend("topleft",c("fitted model","bootstrap"),lty=c(1,2),col=c("red","blue"))
for (i in 2:8){
hist(BootS[,i],main=row.names(BootsResult)[i],xlab='Coefficient',ylab='Freq',cex.axis=1.5,cex.lab=1.5);abline(v=S0[i,1],col='red');abline(v=BootsResult[i,4],col="blue",lty="dashed");abline(v=BootsResult[i,5],col="blue",lty="dashed");legend("topleft",c("fitted model","bootstrap"),lty=c(1,2),col=c("red","blue"))
}
Our model estimates are agreed with the 95% confidence interval from bootstraped sample,i.e. they are all within the 95% intervals,as shown in the images.
# estimate in (a)
mean(mydat$APM>fitted(fit))
## [1] 0.2506627
# boostrap estimate
y= fit$x %*% BootsResult[, 1]
mean(mydat$APM>y)
## [1] 0.2497791
# get rid of the level 7 and 8
mydat2=data[data$LeagueIndex!=7 & data$LeagueIndex!=8 ,]
mydat2$LeagueIndex=as.factor(mydat2$LeagueIndex)
library(glmnet); #Ridge and Lasso
## Warning: package 'glmnet' was built under R version 3.5.3
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-18
library(grpreg); #Group lasso
## Warning: package 'grpreg' was built under R version 3.5.3
library(nnet)
fit2 = multinom(LeagueIndex~.,data = mydat2)
## # weights: 102 (80 variable)
## initial value 5921.765046
## iter 10 value 4874.909834
## iter 20 value 4838.144130
## iter 30 value 4788.743204
## iter 40 value 4494.413757
## iter 50 value 4422.392639
## iter 60 value 4410.737496
## iter 70 value 4406.917073
## iter 80 value 4402.398536
## iter 90 value 4393.171837
## iter 100 value 4379.053578
## final value 4379.053578
## stopped after 100 iterations
set.seed(123456)
cv.lasso=cv.glmnet(as.matrix(mydat2[,-1]),as.matrix(mydat2[,1]),alpha=1,standardize=TRUE, family="multinomial")
plot(cv.lasso)
#lambda minimizing the cv error
cv.lasso$lambda.min
## [1] 0.0003174583
# Min cv error
cv.lasso$cvm[cv.lasso$lambda==cv.lasso$lambda.min]
## [1] 2.647523
#lambda 1sd - cv error is off by 1sd of the min error
cv.lasso$lambda.1se
## [1] 0.007506268
#cv error off by 1sd of the min error
cv.lasso$cvm[cv.lasso$lambda==cv.lasso$lambda.1se]
## [1] 2.671452
#Coefficients when lambda.1se
coef(cv.lasso,s=cv.lasso$lambda.1se)
## $`1`
## 16 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -3.618735e+00
## APM -7.454173e-03
## SelectByHotkeys .
## AssignToHotkeys -1.208005e+03
## UniqueHotkeys .
## MinimapAttacks .
## MinimapRightClicks .
## NumberOfPACs .
## GapBetweenPACs 1.788545e-02
## ActionLatency 4.010024e-02
## ActionsInPAC .
## TotalMapExplored .
## WorkersMade -6.728349e+02
## UniqueUnitsMade .
## ComplexUnitsMade .
## ComplexAbilitiesUsed .
##
## $`2`
## 16 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 6.509636e-01
## APM -8.736527e-03
## SelectByHotkeys .
## AssignToHotkeys -1.432854e+03
## UniqueHotkeys .
## MinimapAttacks .
## MinimapRightClicks .
## NumberOfPACs -3.448783e+02
## GapBetweenPACs 1.279982e-02
## ActionLatency 9.137930e-03
## ActionsInPAC .
## TotalMapExplored .
## WorkersMade -7.811623e+01
## UniqueUnitsMade .
## ComplexUnitsMade -4.737193e+02
## ComplexAbilitiesUsed .
##
## $`3`
## 16 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -9.320352e-01
## APM .
## SelectByHotkeys -2.664905e+01
## AssignToHotkeys -2.302732e+02
## UniqueHotkeys .
## MinimapAttacks -2.052464e+02
## MinimapRightClicks .
## NumberOfPACs .
## GapBetweenPACs 6.910462e-06
## ActionLatency 1.714435e-02
## ActionsInPAC .
## TotalMapExplored .
## WorkersMade .
## UniqueUnitsMade .
## ComplexUnitsMade .
## ComplexAbilitiesUsed .
##
## $`4`
## 16 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 9.655769e-01
## APM .
## SelectByHotkeys .
## AssignToHotkeys 2.302732e+02
## UniqueHotkeys .
## MinimapAttacks .
## MinimapRightClicks .
## NumberOfPACs .
## GapBetweenPACs -6.910462e-06
## ActionLatency -9.137930e-03
## ActionsInPAC .
## TotalMapExplored 1.969862e-03
## WorkersMade .
## UniqueUnitsMade .
## ComplexUnitsMade 4.568420e+02
## ComplexAbilitiesUsed .
##
## $`5`
## 16 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 1.600930e+00
## APM 2.995355e-03
## SelectByHotkeys 3.349526e+01
## AssignToHotkeys 6.516451e+02
## UniqueHotkeys 3.371008e-02
## MinimapAttacks 1.405990e+03
## MinimapRightClicks .
## NumberOfPACs 8.145506e+01
## GapBetweenPACs -1.229095e-02
## ActionLatency -3.560177e-02
## ActionsInPAC .
## TotalMapExplored .
## WorkersMade 2.802311e+02
## UniqueUnitsMade .
## ComplexUnitsMade 6.644872e+00
## ComplexAbilitiesUsed 1.165408e+02
##
## $`6`
## 16 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 1.333300e+00
## APM 1.809775e-03
## SelectByHotkeys 8.263233e+01
## AssignToHotkeys 1.465653e+03
## UniqueHotkeys 9.504474e-02
## MinimapAttacks 2.150834e+03
## MinimapRightClicks .
## NumberOfPACs 3.184404e+02
## GapBetweenPACs -2.486925e-02
## ActionLatency -6.083014e-02
## ActionsInPAC .
## TotalMapExplored .
## WorkersMade 2.351729e+02
## UniqueUnitsMade .
## ComplexUnitsMade .
## ComplexAbilitiesUsed .
Group 1,3,4 we have 5 selected vairables. Group 2 has 7 selected variables. Group 5 has 11 selected variables. Group 6 has 9 selected variables. This makes group 1,3,4 have the smallest number of vairables and group 5 has the largest.
Most useful: AssignToHotkeys,GapBetweenPACs,ActionLatency,these three vairables are selected across different groups.
Least useful: ActionsInPAC,MinimapRightClicks,UniqueUnitsMade, these three variables are not selected across different groups.