Recreating fire chart

### RECREATING PLOTS
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
wildfires<-readr::read_csv("https://raw.githubusercontent.com/BuzzFeedNews/2018-07-wildfire-trends/master/data/calfire_frap.csv") %>%
  mutate(plot_date = as.Date(format(alarm_date,"2017-%m-%d")))
## Rows: 14847 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): state, agency, unit_id, fire_name, inc_num, comments, fire_num
## dbl  (9): objectid, year_, cause, report_ac, gis_acres, c_method, objective,...
## date (2): alarm_date, cont_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(wildfires)
## spec_tbl_df [14,847 × 19] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ objectid    : num [1:14847] 1 2 3 4 5 6 7 8 9 10 ...
##  $ year_       : num [1:14847] 2007 2007 2007 2007 2007 ...
##  $ state       : chr [1:14847] "CA" "CA" "CA" "CA" ...
##  $ agency      : chr [1:14847] "CCO" "CCO" "USF" "CCO" ...
##  $ unit_id     : chr [1:14847] "LAC" "LAC" "ANF" "LAC" ...
##  $ fire_name   : chr [1:14847] "OCTOBER" "MAGIC" "RANCH" "EMMA" ...
##  $ inc_num     : chr [1:14847] "246393" "233077" "166" "201384" ...
##  $ alarm_date  : Date[1:14847], format: "2007-10-21" "2007-10-22" ...
##  $ cont_date   : Date[1:14847], format: "2007-10-23" "2007-10-25" ...
##  $ cause       : num [1:14847] 14 14 2 14 14 14 14 14 14 14 ...
##  $ comments    : chr [1:14847] NA NA NA NA ...
##  $ report_ac   : num [1:14847] NA NA 54716 NA NA ...
##  $ gis_acres   : num [1:14847] 25.7 2824.9 58410.3 172.2 4708 ...
##  $ c_method    : num [1:14847] 8 8 7 8 8 8 8 8 8 8 ...
##  $ objective   : num [1:14847] 1 1 1 1 1 1 1 1 1 1 ...
##  $ fire_num    : chr [1:14847] "233414" "233077" "166" "201384" ...
##  $ shape_length: num [1:14847] 1902 20408 169151 6118 22907 ...
##  $ shape_area  : num [1:14847] 1.04e+05 1.14e+07 2.36e+08 6.97e+05 1.91e+07 ...
##  $ plot_date   : Date[1:14847], format: "2017-10-21" "2017-10-22" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   objectid = col_double(),
##   ..   year_ = col_double(),
##   ..   state = col_character(),
##   ..   agency = col_character(),
##   ..   unit_id = col_character(),
##   ..   fire_name = col_character(),
##   ..   inc_num = col_character(),
##   ..   alarm_date = col_date(format = ""),
##   ..   cont_date = col_date(format = ""),
##   ..   cause = col_double(),
##   ..   comments = col_character(),
##   ..   report_ac = col_double(),
##   ..   gis_acres = col_double(),
##   ..   c_method = col_double(),
##   ..   objective = col_double(),
##   ..   fire_num = col_character(),
##   ..   shape_length = col_double(),
##   ..   shape_area = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
#wildfiremonth<- wildfires%>%
  #mutate(plot_month= month(plot_date))
  #mutate(DayMonth = format(as.Date(plot_date), "%m-%d"))

library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
#str(wildfiremonth)

ggplot(wildfires, aes(x=plot_date, y=year_, size=gis_acres))+
  geom_jitter(alpha=0.6, color="darkorange")+
  scale_y_reverse()+
  ggtitle("Big fires have gotten more common.")+
  theme(panel.background = element_rect(fill = "black"),
        panel.grid=element_blank())+
  scale_x_date(date_breaks="1 month")
## Warning: Removed 1623 rows containing missing values (geom_point).

  #theme(axis.text.x=element_text())
#flip y-axis, limit y-axis
ggplot(wildfires, aes(x=plot_date, y=year_, size=shape_area))+
  geom_point(alpha=0.4, color="orange")+
  scale_y_reverse(n.breaks=4, labels=c("1950","1970","1990","2010"))+
  ggtitle("Big fires have gotten more common.")+
  theme(plot.background = element_rect(fill = "black"),
        panel.background = element_rect(fill="black"),
        panel.grid=element_blank(),
        panel.grid.major.y = element_line(color="grey"),
        panel.grid.minor.y=element_line(color="grey"),
        axis.title=element_blank(),
        axis.text=element_text(color="lightgrey", face="bold"),
        legend.position = "none",
        title=element_text(color="lightgrey"))+
  scale_x_date(date_breaks="1 month", 
               date_labels=c("Dec","Jan","Feb","Mar", "Apr","May", "Jun","Jul",
                             "Aug","Sep","Oct","Nov"))+
  scale_size(range=c(0.1,9))
## Warning: Removed 1617 rows containing missing values (geom_point).

Variables of interest: date/year, fire size

options(scipen = 999) 
ggplot(wildfires, aes(year_))+
  geom_bar()

meanarea<- mean(wildfires$shape_area)
meanarea
## [1] 7001315
medianarea<- median(wildfires$shape_area)
medianarea
## [1] 576482.2
bigfires<- wildfires%>%
  filter(shape_area>meanarea)

ggplot(bigfires, aes(year_))+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

fire2<- wildfires%>%
  group_by(year_)%>%
  summarize(n=n(), totalarea=sum(shape_area))

ggplot(fire2, aes(year_, n))+
  geom_point(aes(size=totalarea, color=totalarea))+
  geom_line()+
  labs(x="Year", y="Number of fires")