library("htmltools")
## Warning: package 'htmltools' was built under R version 3.6.1
library("webshot")    
## Warning: package 'webshot' was built under R version 3.6.1
#webshot::install_phantomjs()

export_formattable <- function(f, file, width = "100%", height = NULL, 
                               background = "white", delay = 0.2)
    {
      w <- as.htmlwidget(f, width = width, height = height)
      path <- html_print(w, background = background, viewer = NULL)
      url <- paste0("file:///", gsub("\\\\", "/", normalizePath(path)))
      webshot(url,
              file = file,
              selector = ".formattable_widget",
              delay = delay)
    }

Check to ensure all columns were included

If all columns are included:
All should = TRUE

c(colnames(vtr_dump)) %in% c(fields$`VTR/Permit`)
##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
## [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [25]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [37]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [61]  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE


should = 70 or length(fields$VTR/Permit)

length(intersect(c(colnames(vtr_dump)),c(fields$`VTR/Permit`)))
## [1] 65


columns that were included in requested fields, but not included in data dump:

setdiff(c(fields$`VTR/Permit`),c(colnames(vtr_dump)))
## [1] "STATE_CODE"      "GEN_CAT_SCALLOP" "HMS_SQUID"       "QUAHOG"         
## [5] "SEA_SCALLOP"     NA


columns that are included in data dump but different than or not in requested fields:

setdiff(c(colnames(vtr_dump)),c(fields$`VTR/Permit`))
## [1] "PORT_STATE"     "TRIPTYPE"       "HMS"            "OCEAN_QUAHOG"  
## [5] "SCALLOP_GENCAT" "SCALLOP_LA"


Compare to previous data pull

Check STATE vs PORT_STATE match-up

vtr_dump %>% 
  group_by(STATE,PORT_STATE) %>% 
  summarise(RECORDS=n()) %>% 
  arrange(STATE) %>% 
  formattable::formattable(align="l") %>% 
  as.htmlwidget(width = "60%")

Number of records by state

garfostates<-tibble(STATE=c("ME","NH","MA","RI","CT","NY","NJ","DE","VA","MD","NC"))

vtr_dump %>% 
  filter(STATE %in% garfostates$STATE) %>% 
  group_by(STATE) %>% 
  summarise(RECORDS = n()) %>% 
  formattable::formattable(align="l") %>% 
  as.htmlwidget(width = "50%")

Compare old 14-15 VTR data to new VTR dump

c(colnames(vtr_dump)) %in% c(colnames(vtr1415))
##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
## [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [25]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [37]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [61]  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE

columns that were included in requested fields, but not included in data dump:

setdiff(c(colnames(vtr1415)),c(colnames(vtr_dump)))
## [1] "STATE_CODE"      "GEN_CAT_SCALLOP" "HMS_SQUID"       "QUAHOG"         
## [5] "SEA_SCALLOP"


columns that are included in data dump but different than or not in requested fields:

setdiff(c(colnames(vtr_dump)),c(colnames(vtr1415)))
## [1] "PORT_STATE"     "TRIPTYPE"       "HMS"            "OCEAN_QUAHOG"  
## [5] "SCALLOP_GENCAT" "SCALLOP_LA"

note: annoying differences in the way the date is formatted now vs previously now: 21-MAY-2014 then: 5/21/2014

Output is difference between number records in new vs old data (2014 and 2015 respectively)

dump1415<-vtr_dump %>% 
  filter(VTR_YEAR <= 2015) 

new<-dump1415 %>% 
  group_by(VTR_YEAR, STATE) %>% 
  summarise(NEW=n())

original<-vtr1415 %>% 
  rename("PORT_STATE" = "STATE_CODE") %>% 
  group_by(VTR_YEAR,STATE) %>% 
  summarise(ORIGINAL=n())


new14<-dump1415 %>% 
  filter(VTR_YEAR == "2014")

new15<-dump1415 %>% 
  filter(VTR_YEAR == "2015")

old14<-vtr1415 %>% 
  filter(VTR_YEAR == "2014")

old15<-vtr1415 %>% 
  filter(VTR_YEAR == "2015")

length(new14$PERMIT) - length(old14$PERMIT)
## [1] 1164
length(new15$PERMIT) - length(old15$PERMIT)
## [1] 2938

Compare 14-15 VTR data to VTR dump

Difference between total number of records by state

vtr_years<-original %>% 
  left_join(new, by = c("VTR_YEAR","STATE")) %>% 
  mutate(DIFF = NEW - ORIGINAL)  %>% 
  arrange(STATE,VTR_YEAR) %>% 
  formattable::formattable(align="l") %>% 
  as.htmlwidget(width = "100%")

vtr_years
#export_formattable(vtr_years,"VTR_year_state_comparison.jpg")

Difference between total number of trips by state

new_trips<-dump1415 %>% 
  distinct(VTR_YEAR, STATE, PERMIT, TRIP_ID, SUB_TRIP_ID) %>% 
  group_by(VTR_YEAR, STATE) %>% 
  summarise(NEW_TRIPS=n())

original_trips<-vtr1415 %>% 
  rename("PORT_STATE" = "STATE_CODE") %>% 
  distinct(VTR_YEAR, STATE, PERMIT, TRIP_ID, SUB_TRIP_ID) %>% 
  group_by(VTR_YEAR,STATE) %>% 
  summarise(ORIGINAL_TRIPS=n())


original_trips %>% 
  left_join(new_trips, by = c("VTR_YEAR","STATE")) %>% 
  mutate(DIFF = NEW_TRIPS - ORIGINAL_TRIPS) %>% 
  arrange(STATE,VTR_YEAR) %>% 
  formattable::formattable(align="l") %>% 
  as.htmlwidget(width = "80%")

Check that there are not latent permits

If there are, tables will have > 0 rows

dump1415 %>% 
  filter(is.na(KEPT) & is.na(DISCARDED)) %>% 
  count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1     0
vtr1415 %>% 
  filter(is.na(KEPT) & is.na(DISCARDED))  %>% 
  count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1     0
dump1415 %>% 
  filter(is.na(TRIP_ID)) %>% 
  count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1     0
vtr1415 %>% 
  filter(is.na(TRIP_ID)) %>% 
  count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1     0