set.seed(123456)
data <- read.csv("SkillCraftDataset.csv",header=T)
data = subset(data,select = -c(GameID,Age,HoursPerWeek,TotalHours))
str(data)
## 'data.frame':    3395 obs. of  16 variables:
##  $ LeagueIndex         : int  5 5 4 3 3 2 1 7 4 4 ...
##  $ APM                 : num  144 129 70 108 123 ...
##  $ SelectByHotkeys     : num  0.00352 0.0033 0.0011 0.00103 0.00114 ...
##  $ AssignToHotkeys     : num  0.00022 0.000259 0.000336 0.000213 0.000327 ...
##  $ UniqueHotkeys       : int  7 4 4 1 2 2 6 6 2 8 ...
##  $ MinimapAttacks      : num  1.10e-04 2.94e-04 2.94e-04 5.33e-05 0.00 ...
##  $ MinimapRightClicks  : num  0.000392 0.000432 0.000461 0.000543 0.001329 ...
##  $ NumberOfPACs        : num  0.00485 0.00431 0.00293 0.00378 0.00237 ...
##  $ GapBetweenPACs      : num  32.7 32.9 44.6 29.2 22.7 ...
##  $ ActionLatency       : num  40.9 42.3 75.4 53.7 62.1 ...
##  $ ActionsInPAC        : num  4.75 4.84 4.04 4.92 9.37 ...
##  $ TotalMapExplored    : int  28 22 22 19 15 16 15 45 29 27 ...
##  $ WorkersMade         : num  0.001397 0.001193 0.000745 0.000426 0.001174 ...
##  $ UniqueUnitsMade     : int  6 5 6 7 4 6 5 9 7 6 ...
##  $ ComplexUnitsMade    : num  0 0 0 0 0 ...
##  $ ComplexAbilitiesUsed: num  0.00 2.08e-04 1.89e-04 3.84e-04 1.93e-05 ...
mydat <- subset(data,select=c(APM,SelectByHotkeys, MinimapRightClicks,
NumberOfPACs, GapBetweenPACs, ActionLatency, ActionsInPAC,WorkersMade))
summary(mydat)
##       APM         SelectByHotkeys    MinimapRightClicks 
##  Min.   : 22.06   Min.   :0.000000   Min.   :0.0000000  
##  1st Qu.: 79.90   1st Qu.:0.001258   1st Qu.:0.0001401  
##  Median :108.01   Median :0.002500   Median :0.0002815  
##  Mean   :117.05   Mean   :0.004299   Mean   :0.0003874  
##  3rd Qu.:142.79   3rd Qu.:0.005133   3rd Qu.:0.0005141  
##  Max.   :389.83   Max.   :0.043088   Max.   :0.0040408  
##   NumberOfPACs      GapBetweenPACs    ActionLatency     ActionsInPAC   
##  Min.   :0.000679   Min.   :  6.667   Min.   : 24.09   Min.   : 2.039  
##  1st Qu.:0.002754   1st Qu.: 28.958   1st Qu.: 50.45   1st Qu.: 4.273  
##  Median :0.003395   Median : 36.724   Median : 60.93   Median : 5.096  
##  Mean   :0.003463   Mean   : 40.362   Mean   : 63.74   Mean   : 5.273  
##  3rd Qu.:0.004027   3rd Qu.: 48.291   3rd Qu.: 73.68   3rd Qu.: 6.034  
##  Max.   :0.007971   Max.   :237.143   Max.   :176.37   Max.   :18.558  
##   WorkersMade       
##  Min.   :0.0000770  
##  1st Qu.:0.0006830  
##  Median :0.0009052  
##  Mean   :0.0010317  
##  3rd Qu.:0.0012587  
##  Max.   :0.0051493
dim(data)
## [1] 3395   16

1 (a)

library(quantreg)
## Warning: package 'quantreg' was built under R version 3.5.3
## Loading required package: SparseM
## Warning: package 'SparseM' was built under R version 3.5.2
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
fit <- rq(APM~.,data=mydat,tau=.75)
summary(fit)
## Warning in summary.rq(fit): 6 non-positive fis
## 
## Call: rq(formula = APM ~ ., tau = 0.75, data = mydat)
## 
## tau: [1] 0.75
## 
## Coefficients:
##                    Value       Std. Error  t value     Pr(>|t|)   
## (Intercept)          -95.21231     1.73972   -54.72863     0.00000
## SelectByHotkeys     5522.04132    42.61041   129.59370     0.00000
## MinimapRightClicks  1313.28280   380.50402     3.45143     0.00056
## NumberOfPACs       28878.41353   236.19330   122.26602     0.00000
## GapBetweenPACs         0.03350     0.00803     4.17046     0.00003
## ActionLatency          0.06178     0.01350     4.57524     0.00000
## ActionsInPAC          16.33639     0.12031   135.78602     0.00000
## WorkersMade          792.57378   220.31015     3.59754     0.00033

1 (b) (i) 95% confidence interval

n=nrow(mydat)
B=1e3 
Nindex=matrix(0,B,n) 
BootS=matrix(NA,B,length(fit$coefficients))
S0=summary(fit)$coefficients
## Warning in summary.rq(fit): 6 non-positive fis
for(b in 1:B){ 
  J=sample(n,n,replace=T)
  Nindex[b,sort(unique(J))]=table(J) 
  f = rq(APM~.,data=mydat[J,],tau=.75)
  BootS[b,]=f$coefficients
}


BootsResult=matrix(NA,8,5)
rownames(BootsResult)=c('intercept',names(fit$coefficients)[-1])
colnames(BootsResult)=c('mean','bias','std','2.5%','97.5%')

BootsResult[,1]=colMeans(BootS)
BootsResult[,2]=colMeans(BootS)-S0[,1]
BootsResult[,3]=apply(BootS,2,sd)
BootsResult[,4]=apply(BootS,2,function(x) quantile(x,prob=0.025))
BootsResult[,5]=apply(BootS,2,function(x) quantile(x,prob=0.975))
BootsResult
##                             mean          bias          std          2.5%
## intercept          -9.538239e+01 -1.700817e-01   2.98576048 -1.006886e+02
## SelectByHotkeys     5.518170e+03 -3.871438e+00  43.20918845  5.432552e+03
## MinimapRightClicks  1.338880e+03  2.559756e+01 463.32016520  4.546420e+02
## NumberOfPACs        2.890649e+04  2.807302e+01 375.38660638  2.819694e+04
## GapBetweenPACs      3.753668e-02  4.036357e-03   0.01034546  1.890149e-02
## ActionLatency       5.894630e-02 -2.833624e-03   0.02101409  1.221246e-02
## ActionsInPAC        1.637788e+01  4.149106e-02   0.21401288  1.598342e+01
## WorkersMade         6.574170e+02 -1.351568e+02 236.90726554  1.846755e+02
##                            97.5%
## intercept          -8.890634e+01
## SelectByHotkeys     5.590324e+03
## MinimapRightClicks  2.250195e+03
## NumberOfPACs        2.964103e+04
## GapBetweenPACs      5.984609e-02
## ActionLatency       9.471879e-02
## ActionsInPAC        1.682811e+01
## WorkersMade         1.103795e+03

1(b)(ii)

hist(BootS[,1],main='intercept',xlab='Intercept',ylab='Freq',cex.axis=1.5,cex.lab=1.5);abline(v=S0[1,1],col='red');abline(v=BootsResult[1,4],col="blue",lty="dashed");abline(v=BootsResult[1,5],col="blue",lty="dashed");legend("topleft",c("fitted model","bootstrap"),lty=c(1,2),col=c("red","blue"))

for (i in 2:8){
  hist(BootS[,i],main=row.names(BootsResult)[i],xlab='Coefficient',ylab='Freq',cex.axis=1.5,cex.lab=1.5);abline(v=S0[i,1],col='red');abline(v=BootsResult[i,4],col="blue",lty="dashed");abline(v=BootsResult[i,5],col="blue",lty="dashed");legend("topleft",c("fitted model","bootstrap"),lty=c(1,2),col=c("red","blue"))
} 

Our model estimates are agreed with the 95% confidence interval from bootstraped sample,i.e. they are all within the 95% intervals,as shown in the images.

1(c)

# estimate in (a)
mean(mydat$APM>fitted(fit))
## [1] 0.2506627
# boostrap estimate 
y= fit$x %*% BootsResult[, 1]
mean(mydat$APM>y)
## [1] 0.2497791

2 (a)

# get rid of the level 7 and 8 
mydat2=data[data$LeagueIndex!=7 & data$LeagueIndex!=8 ,]
mydat2$LeagueIndex=as.factor(mydat2$LeagueIndex) 

library(glmnet); #Ridge and Lasso  
## Warning: package 'glmnet' was built under R version 3.5.3
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-18
library(grpreg); #Group lasso
## Warning: package 'grpreg' was built under R version 3.5.3
library(nnet)
fit2 = multinom(LeagueIndex~.,data = mydat2)
## # weights:  102 (80 variable)
## initial  value 5921.765046 
## iter  10 value 4874.909834
## iter  20 value 4838.144130
## iter  30 value 4788.743204
## iter  40 value 4494.413757
## iter  50 value 4422.392639
## iter  60 value 4410.737496
## iter  70 value 4406.917073
## iter  80 value 4402.398536
## iter  90 value 4393.171837
## iter 100 value 4379.053578
## final  value 4379.053578 
## stopped after 100 iterations
set.seed(123456)
cv.lasso=cv.glmnet(as.matrix(mydat2[,-1]),as.matrix(mydat2[,1]),alpha=1,standardize=TRUE, family="multinomial")
plot(cv.lasso)

#lambda minimizing the cv error
cv.lasso$lambda.min
## [1] 0.0003174583
# Min cv error
cv.lasso$cvm[cv.lasso$lambda==cv.lasso$lambda.min]
## [1] 2.647523
#lambda 1sd - cv error is off by 1sd of the min error
cv.lasso$lambda.1se
## [1] 0.007506268
#cv error off by 1sd of the min error
cv.lasso$cvm[cv.lasso$lambda==cv.lasso$lambda.1se]
## [1] 2.671452
#Coefficients when lambda.1se
coef(cv.lasso,s=cv.lasso$lambda.1se)
## $`1`
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                                  1
## (Intercept)          -3.618735e+00
## APM                  -7.454173e-03
## SelectByHotkeys       .           
## AssignToHotkeys      -1.208005e+03
## UniqueHotkeys         .           
## MinimapAttacks        .           
## MinimapRightClicks    .           
## NumberOfPACs          .           
## GapBetweenPACs        1.788545e-02
## ActionLatency         4.010024e-02
## ActionsInPAC          .           
## TotalMapExplored      .           
## WorkersMade          -6.728349e+02
## UniqueUnitsMade       .           
## ComplexUnitsMade      .           
## ComplexAbilitiesUsed  .           
## 
## $`2`
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                                  1
## (Intercept)           6.509636e-01
## APM                  -8.736527e-03
## SelectByHotkeys       .           
## AssignToHotkeys      -1.432854e+03
## UniqueHotkeys         .           
## MinimapAttacks        .           
## MinimapRightClicks    .           
## NumberOfPACs         -3.448783e+02
## GapBetweenPACs        1.279982e-02
## ActionLatency         9.137930e-03
## ActionsInPAC          .           
## TotalMapExplored      .           
## WorkersMade          -7.811623e+01
## UniqueUnitsMade       .           
## ComplexUnitsMade     -4.737193e+02
## ComplexAbilitiesUsed  .           
## 
## $`3`
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                                  1
## (Intercept)          -9.320352e-01
## APM                   .           
## SelectByHotkeys      -2.664905e+01
## AssignToHotkeys      -2.302732e+02
## UniqueHotkeys         .           
## MinimapAttacks       -2.052464e+02
## MinimapRightClicks    .           
## NumberOfPACs          .           
## GapBetweenPACs        6.910462e-06
## ActionLatency         1.714435e-02
## ActionsInPAC          .           
## TotalMapExplored      .           
## WorkersMade           .           
## UniqueUnitsMade       .           
## ComplexUnitsMade      .           
## ComplexAbilitiesUsed  .           
## 
## $`4`
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                                  1
## (Intercept)           9.655769e-01
## APM                   .           
## SelectByHotkeys       .           
## AssignToHotkeys       2.302732e+02
## UniqueHotkeys         .           
## MinimapAttacks        .           
## MinimapRightClicks    .           
## NumberOfPACs          .           
## GapBetweenPACs       -6.910462e-06
## ActionLatency        -9.137930e-03
## ActionsInPAC          .           
## TotalMapExplored      1.969862e-03
## WorkersMade           .           
## UniqueUnitsMade       .           
## ComplexUnitsMade      4.568420e+02
## ComplexAbilitiesUsed  .           
## 
## $`5`
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                                  1
## (Intercept)           1.600930e+00
## APM                   2.995355e-03
## SelectByHotkeys       3.349526e+01
## AssignToHotkeys       6.516451e+02
## UniqueHotkeys         3.371008e-02
## MinimapAttacks        1.405990e+03
## MinimapRightClicks    .           
## NumberOfPACs          8.145506e+01
## GapBetweenPACs       -1.229095e-02
## ActionLatency        -3.560177e-02
## ActionsInPAC          .           
## TotalMapExplored      .           
## WorkersMade           2.802311e+02
## UniqueUnitsMade       .           
## ComplexUnitsMade      6.644872e+00
## ComplexAbilitiesUsed  1.165408e+02
## 
## $`6`
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                                  1
## (Intercept)           1.333300e+00
## APM                   1.809775e-03
## SelectByHotkeys       8.263233e+01
## AssignToHotkeys       1.465653e+03
## UniqueHotkeys         9.504474e-02
## MinimapAttacks        2.150834e+03
## MinimapRightClicks    .           
## NumberOfPACs          3.184404e+02
## GapBetweenPACs       -2.486925e-02
## ActionLatency        -6.083014e-02
## ActionsInPAC          .           
## TotalMapExplored      .           
## WorkersMade           2.351729e+02
## UniqueUnitsMade       .           
## ComplexUnitsMade      .           
## ComplexAbilitiesUsed  .

(b)

Group 1,3,4 we have 5 selected vairables. Group 2 has 7 selected variables. Group 5 has 11 selected variables. Group 6 has 9 selected variables. This makes group 1,3,4 have the smallest number of vairables and group 5 has the largest.

(c)

Most useful: AssignToHotkeys,GapBetweenPACs,ActionLatency,these three vairables are selected across different groups.

(d)

Least useful: ActionsInPAC,MinimapRightClicks,UniqueUnitsMade, these three variables are not selected across different groups.