Description

Colorado Parks and Wildlife (CPW) aka Colorado Department of Wildlife (CDOW) provides historical elk herd population estimates. In recent years they even provide a breakdown of Bulls to Cows.

The estimated herds span multiple Game Management Units (Units), that they provide hunting licenses for. The herds are labeled by Data Analysis Units (DAU), and some of them change Units slightly from year to year. In our simplest case we will split the estimated population evenly across Units.

setwd("~/_code/colorado-dow/datasets")

Load required libraries for acquiring data from pdf

library(pdftools,quietly = T)
library(stringr,quietly = T)
library(plyr,quietly = T)
library(dplyr,quietly = T)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr,quietly = T)

# Identify the years that CPW will provide tables for in this pdf format
years <- c(2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016)

Loop through years

COElkPopulationAll <- NULL # Initialize
for (iyear in years) {
  
  # RUN ONCE to download files
  # if (iyear >= 2015) {
  #   download.file(paste("http://cpw.state.co.us/Documents/Hunting/BigGame/Statistics/Elk/",
  #                       iyear,"ElkPopulationEstimates.pdf",sep=""),
  #                 paste(iyear,"COElkPopulation",sep=""))
  # } else {
  #   download.file(paste("http://cpw.state.co.us/Documents/Hunting/BigGame/Statistics/Elk/",
  #                       iyear,"ElkPopulationEstimate.pdf",sep=""),
  #                 paste(iyear,"COElkPopulation",sep=""))
  # }
  
  # This function will directly export the raw text in a character vector with spaces to show 
  # the white space and \n to show the line breaks.
  COElkPop <- pdf_text(paste(iyear,"COElkPopulation",sep=""))
  
  # Having a full page in one element of a vector is not the most practical. Using strsplit 
  # will help separate lines from each other
  COElkPopa <- strsplit(COElkPop, "\n")
  COElkPopa <- COElkPopa[[1]]
  # remove rows with the table headers
  removeheaderrows <- grep(paste(iyear,"POST HUNT POPULATION"), COElkPopa)
  COElkPopb <- COElkPopa[-(1:removeheaderrows)]
  removeheaderrows <- grep("Post Hunt", COElkPopb)
  COElkPopb <- COElkPopb[-removeheaderrows]
  removefooterrows <- grep("Total", COElkPopb)
  COElkPopb <- COElkPopb[-(removefooterrows:length(COElkPopb))]
  
  # determine column names 
  # columnnames <- grep("([:alpha:])", COElkPopb)
  columnnames <- grep("[:punct:]", COElkPopb)
  columnnames1 <- COElkPopb[columnnames]
  columnnames1 <- columnnames1[length(columnnames1)] # use the max row of the columnnames
  columnnames2 <- str_trim(columnnames1) # remove any extra whitespace
  columnnames3 <- unlist(strsplit(columnnames2, split = "\\s{2,}")) # split on two or more white spaces
  columnnames3[grep("DAU", columnnames3)] <- "DAU" # clean up DAU label
  columnnames3[grep("MANAGEMENT", columnnames3)] <- "GMUnits" # clean up DAU label
  if (length(columnnames3 == 4)) {columnnames3[4] <- "Bull_Ratio"} # some years have an extra column
  
  COElkPopb1 <- COElkPopb[-columnnames]
  COElkPopb1 <- str_trim(COElkPopb1) # remove extra whitespace

  # now that it is cleaned up, use the white space to separate into columns
  COElkPopb2 <- str_split_fixed(COElkPopb1,pattern = "\\s{2,}", n=length(columnnames3))

  COElkPopb2 <- as.data.frame(COElkPopb2) # and convert into a dataframe
  colnames(COElkPopb2) <- columnnames3 # apply our column names
  
  COElkPopb2$Estimate <- as.numeric(gsub(",", "", COElkPopb2$Estimate)) # remove commas from Estimate
  # We have been operating by Units, so lets devide the population estimate across the Units the herd is in.... 
  # assume they are evenly distributed
  COElkPopb2$Num_GMUnits <- str_count(as.character(COElkPopb2$GMUnits), pattern = ",") + 1
  COElkPopb2$Population.Unit <- COElkPopb2$Estimate / COElkPopb2$Num_GMUnits

  # get the GMUnits out
  COElkPopb3 <- separate(COElkPopb2, GMUnits, sep = ",",LETTERS)
  COElkPopb3 <- gather(COElkPopb3,"ignore",Unit,A:Z)
  COElkPopb3 <- select(COElkPopb3, -ignore)
  COElkPopb3 <- filter(COElkPopb3, !is.na(Unit))
  COElkPopb3$Unit <- str_trim(COElkPopb3$Unit) # remove extra whitespace
  
  colnames(COElkPopb3)[colnames(COElkPopb3)=="Estimate"] <- "Population.DAU" #change label for clarification
  COElkPopb3$Year <- as.character(iyear)
  COElkPopulationAll <- rbind.fill(COElkPopulationAll,COElkPopb3)
  
}
## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 46 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 46 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 46 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 46 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 46 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 43 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 43 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 43 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 43 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 43 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 43 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 43 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

Cleanup the footnotes placed on some of the Unit ids

COElkPopulationAll$Unit <- str_remove(COElkPopulationAll$Unit, "[:alpha:]") # remove the letters
COElkPopulationAll$Unit <- str_remove(COElkPopulationAll$Unit, "\\*+") # remove the *
COElkPopulationAll$Unit <- str_remove(COElkPopulationAll$Unit, "\\)+") # remove the )
COElkPopulationAll$Unit <- str_remove(COElkPopulationAll$Unit, "\\(+") # remove the (

Take a peak at the dataframe

head(COElkPopulationAll)
##   DAU Population.DAU Bull_Ratio Num_GMUnits Population.Unit Unit Year
## 1   1           2530                      2        1265.000    2 2005
## 2   2          16100                      7        2300.000    3 2005
## 3   3           8170                      5        1634.000    6 2005
## 4   4           3810                      5         762.000    7 2005
## 5   6          37930                     12        3160.833   11 2005
## 6   7           4500                      2        2250.000   15 2005