library(ggplot2)
library(corrplot)
library(corrgram)
library(gridExtra)
This dataset is an aggregate of the screen-fixations from screen movements of StarCraft 2 replay files. Quick look at the data file to detect (if any) outliers and/or NA values.
Questions worth exploring:
df<-read.csv('../../SC2/starcraft.csv',sep=',')
str(df)
## 'data.frame': 3395 obs. of 21 variables:
## $ GameID : int 52 55 56 57 58 60 61 72 77 81 ...
## $ LeagueIndex : int 5 5 4 3 3 2 1 7 4 4 ...
## $ Age : int 27 23 30 19 32 27 21 17 20 18 ...
## $ HoursPerWeek : int 10 10 10 20 10 6 8 42 14 24 ...
## $ TotalHours : int 3000 5000 200 400 500 70 240 10000 2708 800 ...
## $ APM : num 144 129 70 108 123 ...
## $ SelectByHotkeys : num 0.00352 0.0033 0.0011 0.00103 0.00114 ...
## $ AssignToHotkeys : num 0.00022 0.000259 0.000336 0.000213 0.000327 ...
## $ UniqueHotkeys : num 5.49e-05 6.92e-05 4.19e-05 1.07e-05 3.85e-05 2.13e-05 6.74e-05 5.97e-05 1.88e-05 9.98e-05 ...
## $ MinimapAttacks : num 1.10e-04 2.94e-04 2.94e-04 5.33e-05 0.00 ...
## $ MinimapRightClicks: num 0.000392 0.000432 0.000461 0.000543 0.001329 ...
## $ NumberOfPACs : num 0.00485 0.00431 0.00293 0.00378 0.00237 ...
## $ GapBetweenPACs : num 32.7 32.9 44.6 29.2 22.7 ...
## $ ActionLatency : num 40.9 42.3 75.4 53.7 62.1 ...
## $ ActionsInPAC : num 4.75 4.84 4.04 4.92 9.37 ...
## $ TotalMapExplored : num 0.00022 0.000381 0.000231 0.000202 0.000289 ...
## $ WorkersMade : num 0.001397 0.001193 0.000745 0.000426 0.001174 ...
## $ UniqueUnitsMade : num 4.71e-05 8.65e-05 6.29e-05 7.46e-05 7.70e-05 6.38e-05 5.62e-05 8.95e-05 6.58e-05 7.49e-05 ...
## $ ComplexUnitsMade : num 0 0 0 0 0 ...
## $ ComplexAbilityUsed: num 0.00 2.08e-04 1.89e-04 3.84e-04 1.93e-05 ...
## $ MaxTimeStamp : int 127448 57812 95360 93852 51936 94032 89012 100556 106308 80136 ...
summary(df)
## GameID LeagueIndex Age HoursPerWeek
## Min. : 52 Min. :1.000 Min. :16.00 Min. : 0.00
## 1st Qu.: 2464 1st Qu.:3.000 1st Qu.:19.00 1st Qu.: 8.00
## Median : 4874 Median :4.000 Median :21.00 Median : 12.00
## Mean : 4805 Mean :4.184 Mean :21.65 Mean : 15.91
## 3rd Qu.: 7108 3rd Qu.:5.000 3rd Qu.:24.00 3rd Qu.: 20.00
## Max. :10095 Max. :8.000 Max. :44.00 Max. :168.00
## NA's :55 NA's :56
## TotalHours APM SelectByHotkeys
## Min. : 3.0 Min. : 22.06 Min. :0.000000
## 1st Qu.: 300.0 1st Qu.: 79.90 1st Qu.:0.001258
## Median : 500.0 Median :108.01 Median :0.002500
## Mean : 960.4 Mean :117.05 Mean :0.004299
## 3rd Qu.: 800.0 3rd Qu.:142.79 3rd Qu.:0.005133
## Max. :1000000.0 Max. :389.83 Max. :0.043088
## NA's :57
## AssignToHotkeys UniqueHotkeys MinimapAttacks
## Min. :0.0000000 Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:0.0002042 1st Qu.:3.275e-05 1st Qu.:0.000e+00
## Median :0.0003526 Median :5.340e-05 Median :3.990e-05
## Mean :0.0003736 Mean :5.873e-05 Mean :9.831e-05
## 3rd Qu.:0.0004988 3rd Qu.:7.865e-05 3rd Qu.:1.189e-04
## Max. :0.0017522 Max. :3.376e-04 Max. :3.019e-03
##
## MinimapRightClicks NumberOfPACs GapBetweenPACs ActionLatency
## Min. :0.0000000 Min. :0.000679 Min. : 6.667 Min. : 24.09
## 1st Qu.:0.0001401 1st Qu.:0.002754 1st Qu.: 28.958 1st Qu.: 50.45
## Median :0.0002815 Median :0.003395 Median : 36.724 Median : 60.93
## Mean :0.0003874 Mean :0.003463 Mean : 40.362 Mean : 63.74
## 3rd Qu.:0.0005141 3rd Qu.:0.004027 3rd Qu.: 48.291 3rd Qu.: 73.68
## Max. :0.0040408 Max. :0.007971 Max. :237.143 Max. :176.37
##
## ActionsInPAC TotalMapExplored WorkersMade
## Min. : 2.039 Min. :0.0000913 Min. :0.0000770
## 1st Qu.: 4.273 1st Qu.:0.0002244 1st Qu.:0.0006830
## Median : 5.096 Median :0.0002695 Median :0.0009052
## Mean : 5.273 Mean :0.0002825 Mean :0.0010317
## 3rd Qu.: 6.034 3rd Qu.:0.0003253 3rd Qu.:0.0012587
## Max. :18.558 Max. :0.0008319 Max. :0.0051493
##
## UniqueUnitsMade ComplexUnitsMade ComplexAbilityUsed
## Min. :1.970e-05 Min. :0.000e+00 Min. :0.0000000
## 1st Qu.:6.780e-05 1st Qu.:0.000e+00 1st Qu.:0.0000000
## Median :8.220e-05 Median :0.000e+00 Median :0.0000203
## Mean :8.455e-05 Mean :5.943e-05 Mean :0.0001419
## 3rd Qu.:9.860e-05 3rd Qu.:8.555e-05 3rd Qu.:0.0001814
## Max. :2.019e-04 Max. :9.023e-04 Max. :0.0030837
##
## MaxTimeStamp
## Min. : 25224
## 1st Qu.: 60090
## Median : 81012
## Mean : 83598
## 3rd Qu.:102074
## Max. :388032
##
anyNA(df)
## [1] TRUE
For a first look at the data I will simply remove the NA’s and the outlier. Later I will impute the data properly.
df <- na.omit(df)
df<-df[df$TotalHours!=1000000,]
totHoursCleaned<-ggplot(data=df,aes(x=TotalHours)) + geom_histogram(bins=100)
print(totHoursCleaned)
Some attributes (features) may be correlated together so it’s worth looking at the correlation matrix.
num.cols <- sapply(df, is.numeric)
cor.data <- cor(df[,num.cols])
corrPLOT<-corrplot(cor.data,method='ellipse')
league<-function(x){
if (x==1) {return('Bronze')}
else if (x==2) {return('Silver')}
else if (x==3) {return('Gold')}
else if (x==4) {return('Platinum')}
else if (x==5) {return('Diamond')}
else if (x==6) {return('Master')}
else if (x==7) {return('Grand Master')}
else{return('OTHER')}
}
df$LeagueName<-sapply(df$LeagueIndex,league)
Action Latency and Gap Between PACS show a very strong correlation so we can start to look at these features.
actionLatVsLeague<-ggplot(data=df,aes(x=factor(LeagueIndex),y= ActionLatency)) +
geom_boxplot(aes(fill=factor(Age))) + theme(legend.position=c(.9, .65)) +
xlab('League Index') + ylab('Action Latency')
print(actionLatVsLeague)
gapPacVsLeague<-ggplot(data=df,aes(x=factor(LeagueIndex),y= GapBetweenPACs)) +
geom_boxplot(aes(fill=factor(Age))) + theme(legend.position=c(.9, .65)) +
xlab('League Index') + ylab('Gap Between PACs')
print(gapPacVsLeague)
Comments :
hourWeekVsLeague<-ggplot(data=df,aes(x=factor(LeagueIndex),y= HoursPerWeek)) +
geom_boxplot(aes(fill=factor(Age))) + theme(legend.position=c(.9, .65)) +
xlab('League Index') + ylab('HoursPerWeek')
print(hourWeekVsLeague)
totHoursVsLeague<-ggplot(data=df[df$TotalHours<5000,],aes(x=factor(LeagueIndex),y= TotalHours)) +
geom_boxplot(aes(fill=factor(Age))) + theme(legend.position=c(.9, .65)) +
xlab('League Index') + ylab('Total Hours')
print(totHoursVsLeague)
Comments :
## GameID LeagueIndex Age HoursPerWeek TotalHours APM
## 8 72 7 17 42 10000 212.6022
## 11 83 3 16 16 6000 153.8010
## 771 2246 5 22 16 20000 248.0490
## 1979 5610 4 22 10 18000 152.2374
## 2141 6020 5 22 10 9000 106.0056
## 2217 6242 3 24 20 10260 76.5852
## 2325 6518 6 20 8 25000 247.0164
## 3254 9055 3 19 20 6000 102.0114
## SelectByHotkeys AssignToHotkeys UniqueHotkeys MinimapAttacks
## 8 0.009039739 0.000676240 0.000059700 0.001163531
## 11 0.001676615 0.000318557 0.000067100 0.000000000
## 771 0.023703208 0.000390712 0.000130237 0.000000000
## 1979 0.011983053 0.000205514 0.000015800 0.000015800
## 2141 0.003568862 0.000634731 0.000095800 0.000946108
## 2217 0.000779779 0.000196701 0.000000000 0.000063200
## 2325 0.015793839 0.000438389 0.000094800 0.000308057
## 3254 0.002045134 0.000317348 0.000044100 0.000044100
## MinimapRightClicks NumberOfPACs GapBetweenPACs ActionLatency
## 8 0.001253033 0.004952464 24.6117 41.7671
## 11 0.000821541 0.003772383 23.4107 48.0711
## 771 0.000204659 0.004651336 37.8795 45.3760
## 1979 0.000363602 0.003351461 52.1896 63.9811
## 2141 0.000574850 0.003616766 28.6645 55.9603
## 2217 0.000316127 0.002437688 42.9480 84.6340
## 2325 0.001338863 0.004644550 17.6471 37.1837
## 3254 0.000555360 0.003032440 62.5423 67.3140
## ActionsInPAC TotalMapExplored WorkersMade UniqueUnitsMade
## 8 6.6104 0.000447512 0.00227730 0.000089500
## 11 7.0044 0.000402387 0.00159280 0.000117363
## 771 4.7560 0.000390712 0.00152560 0.000111632
## 1979 4.9575 0.000300367 0.00067978 0.000079000
## 2141 4.6159 0.000299401 0.00101800 0.000095800
## 2217 5.9107 0.000189676 0.00044960 0.000070300
## 2325 6.5944 0.000343602 0.00186020 0.000071100
## 3254 6.3605 0.000211566 0.00141040 0.000052900
## ComplexUnitsMade ComplexAbilityUsed MaxTimeStamp LeagueName
## 8 0.000129281 0.00024862 100556 Grand Master
## 11 0.000000000 0.00001680 59644 Gold
## 771 0.000000000 0.00000000 53748 Diamond
## 1979 0.000000000 0.00000000 63256 Platinum
## 2141 0.000000000 0.00015569 83500 Diamond
## 2217 0.000245876 0.00035828 142348 Gold
## 2325 0.000000000 0.00001180 84400 Master
## 3254 0.000238011 0.00194820 113440 Gold
I will look at some features related to the gameplay of SC2 itself, meaning the number of units, special units made per timestamp :
The plots are still vs the LeagueIndex to see the impact/experience of player’s gameplay within a given leauge
workersPerLeague<-ggplot(data=df,aes(x=factor(LeagueIndex),y= WorkersMade)) +
geom_boxplot(aes(fill=factor(Age))) + theme(legend.position=c(.9, .65)) +
xlab('League Index') + ylab('Workers Made')
print(workersPerLeague)
uniqueUnitsMadeVsLeague<-ggplot(data=df,aes(x=factor(LeagueIndex),y= UniqueUnitsMade)) +
geom_boxplot(aes(fill=factor(Age))) + theme(legend.position=c(.9, .65)) +
xlab('League Index') + ylab('Unique Units Made')
print(uniqueUnitsMadeVsLeague)
complexUnitsMadeVsLeague<-ggplot(data=df,aes(x=factor(LeagueIndex),y= ComplexUnitsMade)) +
geom_boxplot(aes(fill=factor(Age))) + theme(legend.position=c(.9, .65)) +
xlab('League Index') + ylab('Complex Units Made')
print(complexUnitsMadeVsLeague)
Comments :
Comments
This outlier can be seen directly by plotting the distribution of TotalHours: