Creating a tidy dataset
## Download the CSVs provided on the wiki
ds1 <- read.csv(url("http://oldgods.net/habitrpg/levels_for_classes_2015-10-07.csv"))
ds2 <- read.csv(url("http://oldgods.net/habitrpg/levels_for_classes_2015-02-12.csv"))
## Row bind them to create one dataset
ds3 <- rbind(ds1,ds2)
## Rename columns for convenience
colnames(ds3)[1] <- c("players")
colnames(ds3)[4] <- c("optout")
## Pick a max level to subset on
maxLevel <- 100
## Rename all instances of "wizard" to "mage" for conformance
ds3$class[ds3$class == 'wizard'] <- c("mage")
## Remove players below level 10 and above the max level
lost <- subset(ds3, level < 10 | level > maxLevel)
data <- subset(ds3, level >= 10 & level <= maxLevel)
## Remove players that opted out but are not warriors (due to glitch, bugs, etc)
errorClasses <- subset(data, optout == 'yes' & class != 'warrior')
data <- data[!(data$optout == 'yes' & data$class != 'warrior'),]
lost <- rbind(lost, errorClasses)
## Compute stats of players lost
lostCount <- sum(lost$players)
lostPercent <- round(sum(lost$players)/sum(data$players)*100,2)
## Export tidy CSV
write.csv(data, file = "habaticaClasses.csv", row.names=FALSE)
Preprocessing Summary
- 88,386 Total Players
- 37,833 players removed (74.84% of total)
- 11 from non-warrior opt outs
- 2,475 over level 100
- 35,347 under level 10
- 50,553 Remaining Players