Creating a tidy dataset

## Download the CSVs provided on the wiki
ds1 <- read.csv(url("http://oldgods.net/habitrpg/levels_for_classes_2015-10-07.csv"))
ds2 <- read.csv(url("http://oldgods.net/habitrpg/levels_for_classes_2015-02-12.csv"))

## Row bind them to create one dataset
ds3 <- rbind(ds1,ds2)

## Rename columns for convenience
colnames(ds3)[1] <- c("players")
colnames(ds3)[4] <- c("optout")

## Pick a max level to subset on
maxLevel <- 100

## Rename all instances of "wizard" to "mage" for conformance
ds3$class[ds3$class == 'wizard'] <- c("mage")

## Remove players below level 10 and above the max level
lost <- subset(ds3, level < 10 | level > maxLevel)

data <- subset(ds3, level >= 10 & level <= maxLevel)

## Remove players that opted out but are not warriors (due to glitch, bugs, etc)
errorClasses <- subset(data, optout == 'yes' & class != 'warrior')
data <- data[!(data$optout == 'yes' & data$class != 'warrior'),]
lost <- rbind(lost, errorClasses)

## Compute stats of players lost
lostCount <- sum(lost$players)
lostPercent <- round(sum(lost$players)/sum(data$players)*100,2)

## Export tidy CSV
write.csv(data, file = "habaticaClasses.csv", row.names=FALSE)

Preprocessing Summary

  • 88,386 Total Players
  • 37,833 players removed (74.84% of total)
    • 11 from non-warrior opt outs
    • 2,475 over level 100
    • 35,347 under level 10
  • 50,553 Remaining Players