Download and Read PDF Draw Summaries from Colorado CPW

Description

Colorado Parks and Wildlife (CPW) aka Colorado Department of Wildlife (CDOW) provides historical statistics on Big Game Draw Results. The ‘Draw’ is the application process that CPW utilizes. Altogether it is rather complex when you consider all of the possible options, but here we will initially limit the analysis to the general hunt seasons, and combine the hunter types (Resident, nonResident, Youth). Hunters apply using hunt codes in the Spring, and CPW posts the results of the Draw in early summer for fall hunts.

setwd("~/_code/colorado-dow/datasets")
library(dplyr,quietly = T)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr,quietly = T)
library(pdftools,quietly = T)
library(stringr,quietly = T)

# Identify the years that CPW will provide tables for in this pdf format
years <- c(2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018)

Loop through years

COElkDrawAll <- NULL # Initialize
for (iyear in years) {
  
  # RUN ONCE to download
  # if (iyear >= 2015) {
  #   download.file(paste("http://cpw.state.co.us/Documents/Hunting/BigGame/Statistics/Elk/",
  #                       iyear,"ElkDrawRecap.pdf",sep=""),
  #                 paste(iyear,"COElkDraw",sep=""))
  # } else {
  #   download.file(paste("http://cpw.state.co.us/Documents/Hunting/BigGame/Statistics/Elk/",
  #                       iyear,"ElkDrawSummary.pdf",sep=""),
  #                 paste(iyear,"COElkDraw",sep=""))
  # }
  
  # This function will directly export the raw text in a character vector with spaces to show 
  # the white space and \n to show the line breaks.
  COElkDraw <- pdf_text(paste(iyear,"COElkDraw",sep=""))
  
  # Having a full page in one element of a vector is not the most practical. Using strsplit 
  # will help separate lines from each other
  COElkDraw <- strsplit(COElkDraw, "\n")

  if (iyear >= 2015) { #different table format for recent years
    #remove the first two summary pages
    COElkDrawa <- COElkDraw[-1:-2]

    # unlist page elements
    COElkDraw1 <- unlist(COElkDrawa)
    COElkDraw1 <- str_trim(COElkDraw1)
    # remove rows with
    removerows <- c(grep("Colorado Parks", COElkDraw1), 
                      grep("Primary Elk Draw", COElkDraw1), 
                      grep("Youth Preference", COElkDraw1), 
                      grep("Landowner Leftover", COElkDraw1),
                      grep("Draw Recap", COElkDraw1),
                    grep("Determined", COElkDraw1),
                    grep("by Draw", COElkDraw1),
                    grep("Successful", COElkDraw1),
                    grep("Drawn Out At", COElkDraw1)
                    )
    COElkDraw2 <- COElkDraw1[-removerows]
    
    # index of rows that we are interested in, based on unique text of nearby fields
    HuntCode<- grep("Total Quota", COElkDraw2)-1 #hunt code is one row above this on each page
    TotalQuota <- grep("Total Quota", COElkDraw2)+3 #quota is 3 rows below
    TotalChoice1 <- grep("General Apps", COElkDraw2)+1 #choice 1 total is one row below
    NumDrawn <- grep("#DrawnHuntCode", gsub(" ", "", COElkDraw2, fixed = TRUE))+2 #drawn is 2 rows below

    # put data we are intested in into a dataframe
    COElkDraw4 <- NULL
    COElkDraw4$HuntCode <- COElkDraw2[HuntCode]
    COElkDraw4 <- as.data.frame(COElkDraw4)
    COElkDraw4$Orig_Quota <- COElkDraw2[TotalQuota]
    COElkDraw4$Orig_Quota <- as.numeric(sub(" .*$", "", COElkDraw4$Orig_Quota))
    COElkDraw4$Ttl_Chce_1 <- COElkDraw2[TotalChoice1]
    COElkDraw4$Ttl_Chce_1 <- str_trim(gsub(pattern = "(.*Total Choice 1)(.*)",replacement = "\\2",x = COElkDraw4$Ttl_Chce_1))
    COElkDraw4$Ttl_Chce_1 <- as.numeric(sub(" .*$", "", COElkDraw4$Ttl_Chce_1))
    COElkDraw4$Chcs_Drawn <- COElkDraw2[NumDrawn]
    COElkDraw4$Chcs_Drawn <- as.numeric(sub(" .*$", "", COElkDraw4$Chcs_Drawn))
    
    # some of the sheets combine hunt codes, here we will duplicate stats on each of them
    multihuntcodes <- filter(COElkDraw4, str_count(HuntCode) > 8)
    multihuntcodes <- separate(multihuntcodes, HuntCode, sep = " ",LETTERS)
    multihuntcodes <- gather(multihuntcodes,"ignore",HuntCode,A:Z)
    multihuntcodes <- select(multihuntcodes, -ignore)
    multihuntcodes <- filter(multihuntcodes, !is.na(HuntCode))
    COElkDraw4 <- filter(COElkDraw4, str_count(HuntCode) <= 8)
    COElkDraw4 <- rbind(COElkDraw4,multihuntcodes)
    
  } else {
    # unlist page elements
    COElkDraw1 <- unlist(COElkDraw)
    # remove page headings
    pageheadings <- c(grep("Date", COElkDraw1), grep("Time", COElkDraw1), grep("Elk", COElkDraw1), grep("HntCde", COElkDraw1))
    # drop all rows with the page heading
    COElkDraw2 <- COElkDraw1[-pageheadings]
    # identify the rows we are interested in
    rowsofinterest <- grep("Orig Quota|Chcs Drawn|Choice 1 % Success", COElkDraw2)
    COElkDraw3 <- COElkDraw2[rowsofinterest]
    COElkDraw3 <- str_trim(COElkDraw3)
    
    # put data we are intested in into a dataframe
    COElkDraw3a <- as.data.frame(COElkDraw3)
    colnames(COElkDraw3a) <- "FullString"
    
    #Hunt Code
    COElkDraw3a$HuntCode <- FALSE #initialize
    COElkDraw3a$HuntCode[grep("E", COElkDraw3a$FullString)] <- TRUE #which rows have the code?
    
    COElkDraw3a$HuntCode2 <- substring(COElkDraw3a$FullString,1,8) #grab the code from those rows
    COElkDraw3a$HuntCode[COElkDraw3a$HuntCode==TRUE] <- COElkDraw3a$HuntCode2[COElkDraw3a$HuntCode==TRUE]
    
    COElkDraw3a$Ttl_Chce_1 <- FALSE
    COElkDraw3a$Ttl_Chce_1 <- grepl("Ttl Chce 1",COElkDraw3a$FullString)
    COElkDraw3a$Ttl_Chce_1a <- "NA"
    COElkDraw3a$Ttl_Chce_1a[COElkDraw3a$Ttl_Chce_1==TRUE] <- str_trim(gsub(pattern = "(.*Ttl Chce 1)(.*)( |.*)",
                                                                           replacement = "\\2",
                                                                           x = COElkDraw3a$FullString))[COElkDraw3a$Ttl_Chce_1==TRUE]
    COElkDraw3a$Ttl_Chce_1a[COElkDraw3a$Ttl_Chce_1==TRUE] <- sub(" .*$", "", COElkDraw3a$Ttl_Chce_1a)[COElkDraw3a$Ttl_Chce_1==TRUE]# deletes everything after first space
    
    COElkDraw3a$Orig_Quota <- FALSE
    COElkDraw3a$Orig_Quota <- grepl("Orig Quota",COElkDraw3a$FullString)
    COElkDraw3a$Orig_Quotaa <- "NA"
    COElkDraw3a$Orig_Quotaa[COElkDraw3a$Orig_Quota==TRUE] <- str_trim(gsub(pattern = "(.*Orig Quota)(.*)( Ttl Chce 1.*)",
                                                                           replacement = "\\2",
                                                                           x = COElkDraw3a$FullString))[COElkDraw3a$Orig_Quota==TRUE]
    COElkDraw3a$Orig_Quotaa[COElkDraw3a$Orig_Quota==TRUE] <- sub(" .*$", "", COElkDraw3a$Orig_Quotaa)[COElkDraw3a$Orig_Quota==TRUE]# deletes everything after first space
    
    COElkDraw3a$Chcs_Drawn <- FALSE
    COElkDraw3a$Chcs_Drawn <- grepl("Chcs Drawn",COElkDraw3a$FullString)
    COElkDraw3a$Chcs_Drawna <- "NA"
    COElkDraw3a$Chcs_Drawna[COElkDraw3a$Chcs_Drawn==TRUE] <- str_trim(gsub(pattern = "(.*Chcs Drawn)(.*)(|.*)",
                                                                           replacement = "\\2",
                                                                           x = COElkDraw3a$FullString))[COElkDraw3a$Chcs_Drawn==TRUE]
    COElkDraw3a$Chcs_Drawna[COElkDraw3a$Chcs_Drawn==TRUE] <- sub(" .*$", "", COElkDraw3a$Chcs_Drawna)[COElkDraw3a$Chcs_Drawn==TRUE]# deletes everything after first space
    
    
    # drop the columns not needed
    COElkDraw3b <- select(COElkDraw3a, -FullString, -HuntCode2,-Ttl_Chce_1,-Orig_Quota,-Chcs_Drawn)
    COElkDraw3b$Ttl_Chce_1 <- "NA"
    COElkDraw3b$Ttl_Chce_1[which(COElkDraw3b$HuntCode!=FALSE)] <- COElkDraw3b$Ttl_Chce_1a[which(COElkDraw3b$HuntCode!=FALSE)-1]
    COElkDraw3b$Orig_Quota <- "NA"
    COElkDraw3b$Orig_Quota[which(COElkDraw3b$HuntCode!=FALSE)] <- COElkDraw3b$Orig_Quotaa[which(COElkDraw3b$HuntCode!=FALSE)-1]
    COElkDraw3b$Chcs_Drawn <- "NA"
    COElkDraw3b$Chcs_Drawn[which(COElkDraw3b$HuntCode!=FALSE)] <- COElkDraw3b$Chcs_Drawna[which(COElkDraw3b$HuntCode!=FALSE)]
    
    COElkDraw3c <- select(COElkDraw3b, HuntCode, Orig_Quota, Ttl_Chce_1, Chcs_Drawn)
    COElkDraw3c <- filter(COElkDraw3c, HuntCode != FALSE)
    
    #fill down if multiple hunt codes for a Ttl Chce 1
    #TODO, do we copy? or divide in half??
    COElkDraw3c$Ttl_Chce_1[which(COElkDraw3c$Ttl_Chce_1=="NA")] <- COElkDraw3c$Ttl_Chce_1[which(COElkDraw3c$Ttl_Chce_1=="NA")-1]
    COElkDraw3c$Orig_Quota[which(COElkDraw3c$Orig_Quota=="NA")] <- COElkDraw3c$Orig_Quota[which(COElkDraw3c$Orig_Quota=="NA")-1]
    COElkDraw3c$Chcs_Drawn[which(COElkDraw3c$Chcs_Drawn=="NA")] <- COElkDraw3c$Chcs_Drawn[which(COElkDraw3c$Chcs_Drawn=="NA")-1]
    # TODO might have to do this again... in case there were more than 2
    
    COElkDraw4 <- COElkDraw3c
  }
  
  #DECODE HuntCode
  COElkDraw4$Animal <- substring(COElkDraw4$HuntCode, 1, 1)
  COElkDraw4$Sex <- substring(COElkDraw4$HuntCode, 2, 2)
  COElkDraw4$Unit <- substring(COElkDraw4$HuntCode, 3, 5)
  COElkDraw4$Season_Type <- substring(COElkDraw4$HuntCode, 6, 6)
  COElkDraw4$Season <- substring(COElkDraw4$HuntCode, 7, 7)
  COElkDraw4$Type <- substring(COElkDraw4$HuntCode, 8, 8)
  
  #Only looking at Elk right now
  COElkDraw4 <- filter(COElkDraw4, Animal == "E")
  COElkDraw4 <- select(COElkDraw4, -Animal)
  
  #Only looking at General hunting seasons right now
  COElkDraw4 <- filter(COElkDraw4, Season_Type == "O")
  COElkDraw4 <- select(COElkDraw4, -Season_Type)
  
  #sex
  COElkDraw4$Sex[COElkDraw4$Sex=="E"] <- "Either"
  COElkDraw4$Sex[COElkDraw4$Sex=="M"] <- "Bull"
  COElkDraw4$Sex[COElkDraw4$Sex=="F"] <- "Cow"
  
  # remove preceeding zeros from hunt units
  COElkDraw4$Unit <- as.character(as.numeric(COElkDraw4$Unit))
  
  #Only looking at Rifle hunting seasons right now
  COElkDraw4 <- filter(COElkDraw4, Type=="R")
  COElkDraw4 <- select(COElkDraw4, -Type)
  
  #Drop HuntCode
  COElkDraw4 <- select(COElkDraw4, -HuntCode)
  
  #Clean up field classes
  COElkDraw4$Orig_Quota <- as.numeric(COElkDraw4$Orig_Quota)
  COElkDraw4$Ttl_Chce_1 <- as.numeric(COElkDraw4$Ttl_Chce_1)
  COElkDraw4$Chcs_Drawn <- as.numeric(COElkDraw4$Chcs_Drawn)
  
  # Split select units into multiple (hunt codes were spread across units)
  # assume even spread. Only referencing the 2018 Big Game Brochure...
  # TODO - read Big Game Brochures for actual unit spreads
  # Units 77, 771, 78
  if (!any(COElkDraw4$Unit == "771" | COElkDraw4$Unit == "78")) {
    UnitSpread <- filter(COElkDraw4, Unit == "77")
    COElkDraw4 <- filter(COElkDraw4, Unit != "77")
    UnitSpread$Orig_Quota <- UnitSpread$Orig_Quota / 3
    UnitSpread$Ttl_Chce_1 <- UnitSpread$Ttl_Chce_1 / 3
    UnitSpread$Chcs_Drawn <- UnitSpread$Chcs_Drawn / 3
    UnitSpread1 <- UnitSpread
    UnitSpread2 <- UnitSpread
    UnitSpread1$Unit <- "771"
    UnitSpread2$Unit <- "78"
    COElkDraw4 <- bind_rows(COElkDraw4, UnitSpread,UnitSpread1,UnitSpread2)
  }
  
  # Units 64, 65
  if (!any(COElkDraw4$Unit == "65")) {
    UnitSpread <- filter(COElkDraw4, Unit == "64")
    COElkDraw4 <- filter(COElkDraw4, Unit != "64")
    UnitSpread$Orig_Quota <- UnitSpread$Orig_Quota / 2
    UnitSpread$Ttl_Chce_1 <- UnitSpread$Ttl_Chce_1 / 2
    UnitSpread$Chcs_Drawn <- UnitSpread$Chcs_Drawn / 2
    UnitSpread1 <- UnitSpread
    UnitSpread1$Unit <- "65"
    COElkDraw4 <- bind_rows(COElkDraw4, UnitSpread,UnitSpread1)
  }
  
  # Units 57, 58
  if (!any(COElkDraw4$Unit == "58")) {
    UnitSpread <- filter(COElkDraw4, Unit == "57")
    COElkDraw4 <- filter(COElkDraw4, Unit != "57")
    UnitSpread$Orig_Quota <- UnitSpread$Orig_Quota / 2
    UnitSpread$Ttl_Chce_1 <- UnitSpread$Ttl_Chce_1 / 2
    UnitSpread$Chcs_Drawn <- UnitSpread$Chcs_Drawn / 2
    UnitSpread1 <- UnitSpread
    UnitSpread1$Unit <- "58"
    COElkDraw4 <- bind_rows(COElkDraw4, UnitSpread,UnitSpread1)
  }
  
  # Units 7, 8
  if (!any(COElkDraw4$Unit == "8")) {
    UnitSpread <- filter(COElkDraw4, Unit == "7")
    COElkDraw4 <- filter(COElkDraw4, Unit != "7")
    UnitSpread$Orig_Quota <- UnitSpread$Orig_Quota / 2
    UnitSpread$Ttl_Chce_1 <- UnitSpread$Ttl_Chce_1 / 2
    UnitSpread$Chcs_Drawn <- UnitSpread$Chcs_Drawn / 2
    UnitSpread1 <- UnitSpread
    UnitSpread1$Unit <- "8"
    COElkDraw4 <- bind_rows(COElkDraw4, UnitSpread,UnitSpread1)
  }
  
  # Units 28, 37
  if (!any(COElkDraw4$Unit == "37")) {
    UnitSpread <- filter(COElkDraw4, Unit == "28")
    COElkDraw4 <- filter(COElkDraw4, Unit != "28")
    UnitSpread$Orig_Quota <- UnitSpread$Orig_Quota / 2
    UnitSpread$Ttl_Chce_1 <- UnitSpread$Ttl_Chce_1 / 2
    UnitSpread$Chcs_Drawn <- UnitSpread$Chcs_Drawn / 2
    UnitSpread1 <- UnitSpread
    UnitSpread1$Unit <- "37"
    COElkDraw4 <- bind_rows(COElkDraw4, UnitSpread,UnitSpread1)
  }
  
  # Units 69, 84
  if (!any(COElkDraw4$Unit == "84")) {
    UnitSpread <- filter(COElkDraw4, Unit == "69")
    COElkDraw4 <- filter(COElkDraw4, Unit != "69")
    UnitSpread$Orig_Quota <- UnitSpread$Orig_Quota / 2
    UnitSpread$Ttl_Chce_1 <- UnitSpread$Ttl_Chce_1 / 2
    UnitSpread$Chcs_Drawn <- UnitSpread$Chcs_Drawn / 2
    UnitSpread1 <- UnitSpread
    UnitSpread1$Unit <- "84"
    COElkDraw4 <- bind_rows(COElkDraw4, UnitSpread,UnitSpread1)
  }
  
  # Units 59, 581
  if (!any(COElkDraw4$Unit == "581")) {
    UnitSpread <- filter(COElkDraw4, Unit == "59")
    COElkDraw4 <- filter(COElkDraw4, Unit != "59")
    UnitSpread$Orig_Quota <- UnitSpread$Orig_Quota / 2
    UnitSpread$Ttl_Chce_1 <- UnitSpread$Ttl_Chce_1 / 2
    UnitSpread$Chcs_Drawn <- UnitSpread$Chcs_Drawn / 2
    UnitSpread1 <- UnitSpread
    UnitSpread1$Unit <- "581"
    COElkDraw4 <- bind_rows(COElkDraw4, UnitSpread,UnitSpread1)
  }
  
  # Units 86, 691, 861
  if (!any(COElkDraw4$Unit == "691" | COElkDraw4$Unit == "861")) {
    UnitSpread <- filter(COElkDraw4, Unit == "86")
    COElkDraw4 <- filter(COElkDraw4, Unit != "86")
    UnitSpread$Orig_Quota <- UnitSpread$Orig_Quota / 3
    UnitSpread$Ttl_Chce_1 <- UnitSpread$Ttl_Chce_1 / 3
    UnitSpread$Chcs_Drawn <- UnitSpread$Chcs_Drawn / 3
    UnitSpread1 <- UnitSpread
    UnitSpread2 <- UnitSpread
    UnitSpread1$Unit <- "691"
    UnitSpread2$Unit <- "861"
    COElkDraw4 <- bind_rows(COElkDraw4, UnitSpread,UnitSpread1,UnitSpread2)
  }
  
  #Combine
  COElkDraw4$Year <- as.character(iyear)
  COElkDrawAll <- bind_rows(COElkDrawAll,COElkDraw4)
  
}

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 85 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 85 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 85 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## Warning: Expected 26 pieces. Missing pieces filled with `NA` in 81 rows [1,
## 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

Calculate Draw Success Rate

COElkDrawAll$Draw_Success <- COElkDrawAll$Chcs_Drawn / COElkDrawAll$Ttl_Chce_1

Spread rows based on draw Sex

COElkDrawAll2 <- COElkDrawAll %>%
  gather(label,value,Draw_Success,Ttl_Chce_1,Orig_Quota,Chcs_Drawn) %>%
  unite(label1, Sex, label, sep = ".") %>% 
  spread(label1, value)

Peek at the dataframe

head(COElkDrawAll2)

##   Unit Season Year Bull.Chcs_Drawn Bull.Draw_Success Bull.Orig_Quota
## 1    1      1 2006              NA                NA              NA
## 2    1      1 2007              NA                NA              NA
## 3    1      1 2008              NA                NA              NA
## 4    1      1 2009              NA                NA              NA
## 5    1      1 2010              NA                NA              NA
## 6    1      1 2011              NA                NA              NA
##   Bull.Ttl_Chce_1 Cow.Chcs_Drawn Cow.Draw_Success Cow.Orig_Quota
## 1              NA             15         3.000000             20
## 2              NA             15         3.000000             15
## 3              NA             15         1.250000             15
## 4              NA             15         2.142857             15
## 5              NA             15         7.500000             15
## 6              NA             12         3.000000             15
##   Cow.Ttl_Chce_1 Either.Chcs_Drawn Either.Draw_Success Either.Orig_Quota
## 1              5                NA                  NA                NA
## 2              5                NA                  NA                NA
## 3             12                NA                  NA                NA
## 4              7                NA                  NA                NA
## 5              2                NA                  NA                NA
## 6              4                NA                  NA                NA
##   Either.Ttl_Chce_1
## 1                NA
## 2                NA
## 3                NA
## 4                NA
## 5                NA
## 6                NA

Download and Read PDF Draw Summaries from Colorado CPW

Pierre Sarnow

Tue Jun 19 07:08:06 2018

Description