library(ggplot2)
library(dplyr)
library(gridExtra)
library(corrplot)
library(corrgram)
This dataset contains information on player reconnaissance in over 500 professional-level Starcraft games. From the perspective of one player (the Terran), it contains information on how many enemy (Protoss) units the player has observed, can observe, has seen destroyed, etc., along with an overall measure of how much enemy territory the player can see.
Games are divided into 30 second chunks, with the first 7 minutes of each game being represented in this dataset. Values of variables at any given time cycle represent their values over the entire chunk that ends at that time.
df<-read.csv('../starcraft_scouting.csv',sep=',')
str(df)
## 'data.frame': 227840 obs. of 9 variables:
## $ game : Factor w/ 509 levels "pvt_001","pvt_002",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ cycle : int 720 720 720 720 720 720 720 720 720 720 ...
## $ unit : Factor w/ 32 levels "Protoss Arbiter",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ losses : int 0 0 0 0 0 0 0 0 0 0 ...
## $ observable.units: int 0 0 0 0 0 0 0 0 0 0 ...
## $ observed.losses : int 0 0 0 0 0 0 0 0 0 0 ...
## $ production : int 0 0 0 0 0 0 0 0 0 0 ...
## $ scouting : num 0 0 0 0 0 0 0 0 0 0 ...
## $ vision : num 0 0 0 0 0 0 0 0 0 0 ...
length(unique(df$game))
## [1] 509
summary(df)
## game cycle unit
## pvt_001: 448 Min. : 720 Protoss Arbiter : 7120
## pvt_002: 448 1st Qu.: 2880 Protoss Arbiter Tribunal: 7120
## pvt_004: 448 Median : 5040 Protoss Archon : 7120
## pvt_005: 448 Mean : 5396 Protoss Assimilator : 7120
## pvt_006: 448 3rd Qu.: 7920 Protoss Carrier : 7120
## pvt_008: 448 Max. :10080 Protoss Citadel of Adun : 7120
## (Other):225152 (Other) :185120
## losses observable.units observed.losses production
## Min. : 0.00000 Min. : 0.000 Min. :0.00000 Min. :0.0000
## 1st Qu.: 0.00000 1st Qu.: 0.000 1st Qu.:0.00000 1st Qu.:0.0000
## Median : 0.00000 Median : 0.000 Median :0.00000 Median :0.0000
## Mean : 0.01133 Mean : 0.912 Mean :0.01077 Mean :0.1216
## 3rd Qu.: 0.00000 3rd Qu.: 0.000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :25.00000 Max. :46.000 Max. :9.00000 Max. :8.0000
##
## scouting vision
## Min. : 0.0000 Min. :0.0000
## 1st Qu.: 0.0000 1st Qu.:0.0000
## Median : 0.0000 Median :0.0000
## Mean : 0.1997 Mean :0.1285
## 3rd Qu.: 0.0000 3rd Qu.:0.2244
## Max. :42.0000 Max. :0.9738
## NA's :32
#find the rows with NA's
#df[!complete.cases(df),]
#remove NA's
df<-df[complete.cases(df),]
These are exactly data from 509 games, so a first step will be to focus on one game to undestand the data. There are also 32 NA’s in the Vision features.
pvt_001 <-df[df$game=='pvt_001',]
g1<-ggplot(data=pvt_001,aes(x=cycle)) + geom_histogram(bins=100)
print(g1)
Each cycle is equivalent to 30 seconds of gameplay ,then 30*14(number of entries in the histogram) = 5 minutes makes sense.
pvt_001[pvt_001$cycle==720,c('game','cycle','unit','losses')]
## game cycle unit losses
## 1 pvt_001 720 Protoss Arbiter 0
## 2 pvt_001 720 Protoss Arbiter Tribunal 0
## 3 pvt_001 720 Protoss Archon 0
## 4 pvt_001 720 Protoss Assimilator 0
## 5 pvt_001 720 Protoss Carrier 0
## 6 pvt_001 720 Protoss Citadel of Adun 0
## 7 pvt_001 720 Protoss Corsair 0
## 8 pvt_001 720 Protoss Cybernetics Core 0
## 9 pvt_001 720 Protoss Dark Archon 0
## 10 pvt_001 720 Protoss Dark Templar 0
## 11 pvt_001 720 Protoss Dragoon 0
## 12 pvt_001 720 Protoss Fleet Beacon 0
## 13 pvt_001 720 Protoss Forge 0
## 14 pvt_001 720 Protoss Gateway 0
## 15 pvt_001 720 Protoss High Templar 0
## 16 pvt_001 720 Protoss Interceptor 0
## 17 pvt_001 720 Protoss Nexus 0
## 18 pvt_001 720 Protoss Observatory 0
## 19 pvt_001 720 Protoss Observer 0
## 20 pvt_001 720 Protoss Photon Cannon 0
## 21 pvt_001 720 Protoss Probe 0
## 22 pvt_001 720 Protoss Pylon 0
## 23 pvt_001 720 Protoss Reaver 0
## 24 pvt_001 720 Protoss Robotics Facility 0
## 25 pvt_001 720 Protoss Robotics Support Bay 0
## 26 pvt_001 720 Protoss Scarab 0
## 27 pvt_001 720 Protoss Scout 0
## 28 pvt_001 720 Protoss Shield Battery 0
## 29 pvt_001 720 Protoss Shuttle 0
## 30 pvt_001 720 Protoss Stargate 0
## 31 pvt_001 720 Protoss Templar Archives 0
## 32 pvt_001 720 Protoss Zealot 0
Each cycle has 32 rows, corresponding to all Protoss units.
I summarized the Protoss Units characteristics, according to this wikipedia, in the following table :
#protossChar<-read.csv('protoss_unit.csv',header=FALSE)
#protossChar<-protossChar[,1:4]
name<-c('Probe','Zealot','Sentry','Stalker','High Templar','Dark Templar','Immortal','Colossus','Archon','Observer','Warp Prism','Phoenix','Mothership Core', 'Void Ray','Oracle' ,'Tempest', 'Carrier', 'Interceptor','Mothership','Photon Cannon','Nexus')
psi<-c(1, 2, 2, 2, 2, 2, 4, 6, 4, 1, 2, 2, 2, 4, 3, 4, 6, 0, 8, NA, NA)
wc<-c(50,100,50,125,50,125,250,300,0,25,200,50,100,250,150,300,350,25,300,150,400)
gwc<-c(0,0,100,50,150,125,100,200,0,75,0,100,100,150,150,200,250,0,300,0,0)
protossChar<-data.frame(name,psi,wc,gwc)
colnames(protossChar)<-c('Name','Psi','Warp Cost','Gas Warp Cost')
print(protossChar)
## Name Psi Warp Cost Gas Warp Cost
## 1 Probe 1 50 0
## 2 Zealot 2 100 0
## 3 Sentry 2 50 100
## 4 Stalker 2 125 50
## 5 High Templar 2 50 150
## 6 Dark Templar 2 125 125
## 7 Immortal 4 250 100
## 8 Colossus 6 300 200
## 9 Archon 4 0 0
## 10 Observer 1 25 75
## 11 Warp Prism 2 200 0
## 12 Phoenix 2 50 100
## 13 Mothership Core 2 100 100
## 14 Void Ray 4 250 150
## 15 Oracle 3 150 150
## 16 Tempest 4 300 200
## 17 Carrier 6 350 250
## 18 Interceptor 0 25 0
## 19 Mothership 8 300 300
## 20 Photon Cannon NA 150 0
## 21 Nexus NA 400 0
We can select 2 different units, in term of Cost. I guess Protoss Probe are like minions, meaning they are easy/quick to produce, whereas larger unit like a Nexus may require more time.
unitObservables<-ggplot(data=pvt_001[pvt_001$unit=='Protoss Nexus' | pvt_001$unit=='Protoss Probe',],aes(x=cycle,y=observable.units)) + geom_point(aes(color=factor(unit)),size=3) + theme(legend.position=c(.2, .85))
unitProduction<-ggplot(data=pvt_001[pvt_001$unit=='Protoss Nexus' | pvt_001$unit=='Protoss Probe',],aes(x=cycle,y=production)) + geom_point(aes(color=factor(unit)),size=3)+ theme(legend.position=c(.2, .85))
grid.arrange(unitObservables,unitProduction,ncol=2)
Comments :
Next steps are :
#select all games for the Protoss Probe and display the evolution of
#losses, observables vs. time (cycle)
prDf<-df[df$unit=='Protoss Probe',]
g1<-ggplot() + geom_bar(data=prDf,aes(x=observable.units,color='observable.units')) + geom_bar(data=prDf,aes(x=observed.losses,color='observed.losses')) + facet_wrap(~cycle)
g1<-g1 + scale_colour_manual(name="Protoss Probe",values=c(observable.units ="blue", observed.losses ="red"))
print(g1)
Comments :
For a player :
num.cols <- sapply(prDf, is.numeric)
cor.data <- cor(prDf[,num.cols])
corrPLOT<-corrplot(cor.data,method='ellipse')
Comments : the correlation matrix confirms what we’ve seen with the plot above "
#select all games for the Protoss Nexus and display the evolution of
#losses, observables vs. time (cycle)
nxDf<-df[df$unit=='Protoss Nexus',]
g2<-ggplot() + geom_bar(data=nxDf,aes(x=observable.units,color='observable.units')) + geom_bar(data=nxDf,aes(x=observed.losses,color='observed.losses')) + facet_wrap(~cycle)
g2<-g2 + scale_colour_manual(name="Protoss Nexus",values=c(observable.units ="blue", observed.losses ="red"))
print(g2)
num.cols <- sapply(nxDf, is.numeric)
cor.data <- cor(nxDf[,num.cols])
corrPLOT<-corrplot(cor.data,method='ellipse')