data <- read_excel("../00_data/MyData-Charts.xlsx")
data
## # A tibble: 1,222 × 11
## year months state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 2015 January-… Alab… 7000 7000 1800 26 2800 250 4
## 2 2015 January-… Ariz… 35000 35000 4600 13 3400 2100 6
## 3 2015 January-… Arka… 13000 14000 1500 11 1200 90 1
## 4 2015 January-… Cali… 1440000 1690000 255000 15 250000 124000 7
## 5 2015 January-… Colo… 3500 12500 1500 12 200 140 1
## 6 2015 January-… Conn… 3900 3900 870 22 290 NA NA
## 7 2015 January-… Flor… 305000 315000 42000 13 54000 25000 8
## 8 2015 January-… Geor… 104000 105000 14500 14 47000 9500 9
## 9 2015 January-… Hawa… 10500 10500 380 4 3400 760 7
## 10 2015 January-… Idaho 81000 88000 3700 4 2600 8000 9
## # … with 1,212 more rows, 1 more variable: `Growth of colonies` <dbl>, and
## # abbreviated variable names ¹colony_n, ²colony_max, ³colony_lost,
## # ⁴colony_lost_pct, ⁵colony_added, ⁶colony_reno, ⁷colony_reno_pct
data_small <- read_excel("../00_data/Datasimpler.xlsx")
Make two bar charts here - one before ordering another after
data %>%
count(months) %>%
ggplot(aes(n, months)) +
geom_col()
data %>%
count(months) %>%
ggplot(aes(n, fct_reorder(months, n))) +
geom_col()
data %>%
mutate(months = months %>% fct_infreq() %>% fct_rev()) %>%
ggplot(aes(months)) +
geom_bar()
Show examples of three functions:
data %>%
mutate(months = fct_recode(months,
"Jan-Mar" = "January-March",
"Apr-Jun" = "April-June",
"Jul-Sep" = "July-September",
"Oct-Dec" = "October-December",
)) %>%
count(months)
## # A tibble: 4 × 2
## months n
## <fct> <int>
## 1 Apr-Jun 329
## 2 Jan-Mar 329
## 3 Jul-Sep 282
## 4 Oct-Dec 282
data %>%
mutate(state = fct_collapse(state,
East_coast = c("Maine", "Connecticut", "Massachusetts"),
West_coast = c("California", "Arizona", "Colorado", "Idaho", "Montana"),
)) %>%
count(state)
## # A tibble: 41 × 2
## state n
## <fct> <int>
## 1 Alabama 26
## 2 West_coast 130
## 3 Arkansas 26
## 4 East_coast 78
## 5 Florida 26
## 6 Georgia 26
## 7 Hawaii 26
## 8 Illinois 26
## 9 Indiana 26
## 10 Iowa 26
## # … with 31 more rows
data %>%
mutate(months = fct_lump(months,n=1)) %>%
count(months)
## # A tibble: 3 × 2
## months n
## <fct> <int>
## 1 April-June 329
## 2 January-March 329
## 3 Other 564
No need to do anything here.