If all columns are included:
All should = TRUE
c(colnames(dealer_raw)) %in% c(fields$`CFDERS data`)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
should = 74 or length(fields$CFDERS data)
length(intersect(c(colnames(dealer_raw)),c(fields$`CFDERS data`)))
## [1] 74
should = character(0)
setdiff(c(colnames(dealer_raw)),c(fields$`CFDERS data`))
## character(0)
If temporally complete, N_UNIQUE_DAYS should = 365 or 366
dealer_raw %>%
distinct(DDATE, .keep_all = T) %>%
dplyr::group_by(YEAR) %>%
summarise(N_UNIQUE_DAYS=n()) %>%
formattable::formattable(align="l") %>%
as.htmlwidget(width = "35%")
Some PORT_STATEs have multiple numeric STATE codes associated with them, some STATE codes do not have a PORT_STATE associated with them
There are only 39917 cases from the non-GARFO states (GA, LA, TD and CN *whatever those are)
garfostates<-tibble(STATE=c("ME","NH","MA","RI","CT","NY","NJ","DE","VA","MD","NC"))
state_port_relationship<-dealer_raw %>%
distinct(STATE,PORT_STATE) %>%
arrange(STATE)
state_df<-sort(table(dealer_raw$STATE)) %>% data.frame()
colnames(state_df) <- c("STATE","STATE_landings")
port_state_df<-sort(table(dealer_raw$PORT_STATE)) %>% data.frame()
colnames(port_state_df) <- c("PORT_STATE","PORT_STATE_landings")
state_port_relationship %>%
left_join(state_df, by = "STATE") %>%
left_join(port_state_df, by = "PORT_STATE") %>%
formattable::formattable(align="l") %>%
as.htmlwidget(width = "70%")
state_port_relationship %>%
left_join(state_df, by = "STATE") %>%
left_join(port_state_df, by = "PORT_STATE") %>%
mutate(GARFO_STATE = ifelse((PORT_STATE %in% garfostates$STATE)==T,PORT_STATE,"OTHER STATES")) %>%
group_by(GARFO_STATE) %>%
summarise(PORT_STATE_landings = sum(PORT_STATE_landings,na.rm = T)) %>%
formattable::formattable(align="l") %>%
as.htmlwidget(width = "50%")
These tables/figures all use PORT_STATE as a grouping variable rather than STATE
state_month_df<-dealer_raw %>%
filter(PORT_STATE %in% garfostates$STATE) %>%
mutate(MONTH_NUM = lubridate::month(lubridate::as_date(DDATE))) %>%
mutate(PORT_STATE = ifelse(is.na(PORT_STATE)==T, NA, PORT_STATE)) %>%
group_by(PORT_STATE, MONTH_NUM) %>%
summarise(sum=sum(SPPLNDLB,na.rm=T),n=n())
All states have landings in each month of the year
state_month_df %>%
group_by(PORT_STATE) %>%
summarise(MONTHS_WITH_LANDINGS=n()) %>%
formattable::formattable(align="l") %>%
as.htmlwidget(width = "45%")
state_month_df %>%
ggplot(aes(x=MONTH_NUM,y=n)) + geom_point(size=1.25) + geom_line() + facet_wrap(~PORT_STATE, scales = "free_y") +
scale_x_continuous(breaks=c(2,5,8,11)) + labs(x="Month", y="Landings (n) \n") +
theme(panel.grid = element_blank(), strip.background = element_blank(),strip.text = element_text(color="black",size=12), axis.text = element_text(size=11), axis.title = element_text(size=14))
state_month_df %>%
ggplot(aes(x=MONTH_NUM,y=sum)) + geom_area() + facet_wrap(~PORT_STATE) +
scale_x_continuous(breaks=c(2,5,8,11)) + labs(x="Month", y="Landed lbs \n") +
theme(panel.grid = element_blank(), strip.background = element_blank(),strip.text = element_text(color="black",size=12), axis.text = element_text(size=11), axis.title = element_text(size=14))
dealer_raw %>%
distinct(YEAR,PORT_STATE,SPP_COMMON_NAME) %>%
group_by(YEAR,PORT_STATE) %>%
summarise(nSPECIES=n()) %>%
filter(PORT_STATE %in% garfostates$STATE) %>%
ggplot(aes(x=YEAR,y=nSPECIES)) + geom_bar(stat = "identity") + facet_wrap(~PORT_STATE) +
scale_x_continuous(breaks=c(2016,2018,2020)) + labs(x="Year", y="Species (n) \n") +
theme(panel.grid = element_blank(), strip.background = element_blank(),strip.text = element_text(color="black",size=12), axis.text = element_text(size=11), axis.title = element_text(size=14))
Compare columns:
c(colnames(dealer_raw)) %in% c(colnames(cfders16))
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
c(colnames(dealer_raw)) %in% c(colnames(cfders17))
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
Output is difference between number of unique YEAR_LINK fields in new vs old data
2016
dealer16<-dealer_raw %>%
filter(YEAR == 2016)
dealer17<-dealer_raw %>%
filter(YEAR == 2017)
length(unique(dealer16$YEAR_LINK)) - length(unique(cfders16$YEAR_LINK))
## [1] 449196
2017, note may not be complete year record
length(unique(dealer17$YEAR_LINK)) - length(unique(cfders17$YEAR_LINK))
## [1] 923469
int<-dealer16 %>%
group_by(PORT_STATE) %>%
summarise(NEW=n())
cf16<-cfders16 %>%
group_by(PORT_STATE) %>%
summarise(OLD=n()) %>%
full_join(int, by = "PORT_STATE") %>%
mutate(DIFF = NEW - OLD) %>%
formattable::formattable(align="l") %>%
as.htmlwidget(width = "70%")
cf16
#export_formattable(cf16,"CFDERS_year_state_16.jpg")
int<-dealer17 %>%
group_by(PORT_STATE) %>%
summarise(NEW=n())
cf17<-cfders17 %>%
group_by(PORT_STATE) %>%
summarise(OLD=n()) %>%
full_join(int, by = "PORT_STATE") %>%
mutate(DIFF = NEW - OLD) %>%
formattable::formattable(align="l") %>%
as.htmlwidget(width = "70%")
cf17
#export_formattable(cf17,"CFDERS_year_state_17.jpg")