mydata <- read_excel("../00_data/mydata.xlsx")
mydata %>%
mutate(stateProvince = as_factor(stateProvince))
## # A tibble: 136,621 × 11
## occurrenceID eventID decimalLatitude decimalLongitude scientificName
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 12832 525618 -28.5 153. Philoria loveridgei
## 2 12833 526341 -33.7 151. Heleioporus australiac…
## 3 12834 526673 -28.7 153. Mixophyes iteratus
## 4 12835 526673 -28.7 153. Mixophyes fasciolatus
## 5 12836 526673 -28.7 153. Litoria latopalmata
## 6 12837 527056 -30.4 153. Assa darlingtoni
## 7 12838 527058 -30.4 153. Assa darlingtoni
## 8 12839 528103 -30.4 153 Litoria nasuta
## 9 12840 528103 -30.4 153 Mixophyes iteratus
## 10 12841 528103 -30.4 153 Litoria gracilenta
## # ℹ 136,611 more rows
## # ℹ 6 more variables: eventDate <dttm>, eventTime <dttm>, timezone <chr>,
## # coordinateUncertaintyInMeters <dbl>, recordedBy <dbl>, stateProvince <fct>
Make two bar charts here - one before ordering another after
mydata %>%
ggplot(aes(y = stateProvince)) +
geom_bar() +
labs(title = "Before Ordering (Alphabetical)", y = "State")
mydata %>%
ggplot(aes(y = fct_infreq(stateProvince))) +
geom_bar() +
labs(title = "After Ordering (By Frequency)", y = "State")
Show examples of three functions:
mydata %>%
mutate(state_rev = fct_recode(stateProvince,
"NSW" = "New South Wales",
"QLD" = "Queensland")) %>%
select(stateProvince, state_rev)
## # A tibble: 136,621 × 2
## stateProvince state_rev
## <chr> <fct>
## 1 New South Wales NSW
## 2 New South Wales NSW
## 3 New South Wales NSW
## 4 New South Wales NSW
## 5 New South Wales NSW
## 6 New South Wales NSW
## 7 New South Wales NSW
## 8 New South Wales NSW
## 9 New South Wales NSW
## 10 New South Wales NSW
## # ℹ 136,611 more rows
mydata %>%
mutate(state_col = fct_collapse(stateProvince,
"East Coast" = c("New South Wales", "Queensland", "Victoria"))) %>%
select(stateProvince, state_col)
## # A tibble: 136,621 × 2
## stateProvince state_col
## <chr> <fct>
## 1 New South Wales East Coast
## 2 New South Wales East Coast
## 3 New South Wales East Coast
## 4 New South Wales East Coast
## 5 New South Wales East Coast
## 6 New South Wales East Coast
## 7 New South Wales East Coast
## 8 New South Wales East Coast
## 9 New South Wales East Coast
## 10 New South Wales East Coast
## # ℹ 136,611 more rows
mydata %>%
mutate(state_lump = fct_lump(stateProvince, n = 3)) %>%
count(state_lump)
## # A tibble: 4 × 2
## state_lump n
## <fct> <int>
## 1 New South Wales 58749
## 2 Queensland 23334
## 3 Victoria 32383
## 4 Other 22155
No need to do anything here.