Check to ensure all columns were included

If all columns are included:
All should = TRUE

c(colnames(dealer_raw)) %in% c(fields$`CFDERS data`)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE


should = 74 or length(fields$CFDERS data)

length(intersect(c(colnames(dealer_raw)),c(fields$`CFDERS data`)))
## [1] 74


should = character(0)

setdiff(c(colnames(dealer_raw)),c(fields$`CFDERS data`))
## character(0)

Check to ensure temporal completeness

If temporally complete, N_UNIQUE_DAYS should = 365 or 366

dealer_raw %>%
  distinct(DDATE, .keep_all = T) %>% 
  dplyr::group_by(YEAR) %>% 
  summarise(N_UNIQUE_DAYS=n()) %>% 
  formattable::formattable(align="l") %>% 
  as.htmlwidget(width = "35%")

Check STATE/PORT_STATE discrepancies

Some PORT_STATEs have multiple numeric STATE codes associated with them, some STATE codes do not have a PORT_STATE associated with them
There are only 39917 cases from the non-GARFO states (GA, LA, TD and CN *whatever those are)

garfostates<-tibble(STATE=c("ME","NH","MA","RI","CT","NY","NJ","DE","VA","MD","NC"))

state_port_relationship<-dealer_raw %>% 
  distinct(STATE,PORT_STATE) %>% 
  arrange(STATE)

state_df<-sort(table(dealer_raw$STATE)) %>% data.frame()
colnames(state_df) <- c("STATE","STATE_landings")


port_state_df<-sort(table(dealer_raw$PORT_STATE)) %>% data.frame()
colnames(port_state_df) <- c("PORT_STATE","PORT_STATE_landings")

STATE vs PORT_STATE

state_port_relationship %>% 
  left_join(state_df, by = "STATE") %>% 
  left_join(port_state_df, by = "PORT_STATE") %>% 
  formattable::formattable(align="l") %>% 
  as.htmlwidget(width = "70%")

GARFO states only summed by PORT_STATE

state_port_relationship %>% 
  left_join(state_df, by = "STATE") %>% 
  left_join(port_state_df, by = "PORT_STATE") %>% 
  mutate(GARFO_STATE = ifelse((PORT_STATE %in% garfostates$STATE)==T,PORT_STATE,"OTHER STATES")) %>% 
  group_by(GARFO_STATE) %>% 
  summarise(PORT_STATE_landings = sum(PORT_STATE_landings,na.rm = T)) %>% 
  formattable::formattable(align="l") %>% 
  as.htmlwidget(width = "50%")



Check monthly landings by state

These tables/figures all use PORT_STATE as a grouping variable rather than STATE

state_month_df<-dealer_raw %>% 
  filter(PORT_STATE %in% garfostates$STATE) %>% 
  mutate(MONTH_NUM = lubridate::month(lubridate::as_date(DDATE))) %>% 
  mutate(PORT_STATE = ifelse(is.na(PORT_STATE)==T, NA, PORT_STATE)) %>% 
  group_by(PORT_STATE, MONTH_NUM) %>% 
  summarise(sum=sum(SPPLNDLB,na.rm=T),n=n())


All states have landings in each month of the year

state_month_df %>% 
  group_by(PORT_STATE) %>% 
  summarise(MONTHS_WITH_LANDINGS=n()) %>% 
  formattable::formattable(align="l") %>% 
  as.htmlwidget(width = "45%")

Monthly number of landings by state

state_month_df %>% 
  ggplot(aes(x=MONTH_NUM,y=n)) + geom_point(size=1.25) + geom_line() + facet_wrap(~PORT_STATE, scales = "free_y") +
  scale_x_continuous(breaks=c(2,5,8,11)) + labs(x="Month", y="Landings (n) \n") +
  theme(panel.grid = element_blank(), strip.background = element_blank(),strip.text = element_text(color="black",size=12), axis.text = element_text(size=11), axis.title = element_text(size=14)) 

Monthly lbs landed by state

state_month_df %>% 
  ggplot(aes(x=MONTH_NUM,y=sum)) + geom_area() + facet_wrap(~PORT_STATE) +
  scale_x_continuous(breaks=c(2,5,8,11)) + labs(x="Month", y="Landed lbs \n") +
  theme(panel.grid = element_blank(), strip.background = element_blank(),strip.text = element_text(color="black",size=12), axis.text = element_text(size=11), axis.title = element_text(size=14)) 

Unique species by state

dealer_raw %>%
  distinct(YEAR,PORT_STATE,SPP_COMMON_NAME) %>% 
  group_by(YEAR,PORT_STATE) %>% 
  summarise(nSPECIES=n()) %>% 
  filter(PORT_STATE %in% garfostates$STATE) %>% 
  ggplot(aes(x=YEAR,y=nSPECIES)) + geom_bar(stat = "identity") + facet_wrap(~PORT_STATE) +
  scale_x_continuous(breaks=c(2016,2018,2020)) + labs(x="Year", y="Species (n) \n") +
  theme(panel.grid = element_blank(), strip.background = element_blank(),strip.text = element_text(color="black",size=12), axis.text = element_text(size=11), axis.title = element_text(size=14)) 



Compare to previous data pull

Compare columns:

c(colnames(dealer_raw)) %in% c(colnames(cfders16))
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
c(colnames(dealer_raw)) %in% c(colnames(cfders17))
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE


Output is difference between number of unique YEAR_LINK fields in new vs old data
2016

dealer16<-dealer_raw %>% 
  filter(YEAR == 2016)

dealer17<-dealer_raw %>% 
  filter(YEAR == 2017)

length(unique(dealer16$YEAR_LINK)) - length(unique(cfders16$YEAR_LINK))
## [1] 449196


2017, note may not be complete year record

length(unique(dealer17$YEAR_LINK)) - length(unique(cfders17$YEAR_LINK))
## [1] 923469

2016: Difference between total number of records by state

int<-dealer16 %>% 
  group_by(PORT_STATE) %>% 
  summarise(NEW=n())

cf16<-cfders16 %>% 
  group_by(PORT_STATE) %>% 
  summarise(OLD=n()) %>% 
  full_join(int, by = "PORT_STATE") %>% 
  mutate(DIFF = NEW - OLD) %>% 
  formattable::formattable(align="l") %>% 
  as.htmlwidget(width = "70%")
cf16
#export_formattable(cf16,"CFDERS_year_state_16.jpg")

2017: Difference between total number of records by state

int<-dealer17 %>% 
  group_by(PORT_STATE) %>% 
  summarise(NEW=n())

cf17<-cfders17 %>% 
  group_by(PORT_STATE) %>% 
  summarise(OLD=n()) %>% 
  full_join(int, by = "PORT_STATE") %>% 
  mutate(DIFF = NEW - OLD) %>% 
  formattable::formattable(align="l") %>% 
  as.htmlwidget(width = "70%")
cf17
#export_formattable(cf17,"CFDERS_year_state_17.jpg")