Import your data
mydata <- read_excel("../00_data/mydata.xlsx")
Separating and Uniting
Separate a column
mydata_sep <- mydata %>%
separate(col = eventDate, into = c("year", "month", "day"), sep = "-")
Unite two columns
mydata_sep %>%
unite(col = "new_date", day, month, year, sep = "/") %>%
select(new_date, scientificName)
## # A tibble: 136,621 × 2
## new_date scientificName
## <chr> <chr>
## 1 01/01/2023 Philoria loveridgei
## 2 02/01/2023 Heleioporus australiacus
## 3 02/01/2023 Mixophyes iteratus
## 4 02/01/2023 Mixophyes fasciolatus
## 5 02/01/2023 Litoria latopalmata
## 6 04/01/2023 Assa darlingtoni
## 7 04/01/2023 Assa darlingtoni
## 8 06/01/2023 Litoria nasuta
## 9 06/01/2023 Mixophyes iteratus
## 10 06/01/2023 Litoria gracilenta
## # ℹ 136,611 more rows
Missing Values
mydata %>%
count(scientificName, stateProvince) %>%
pivot_wider(names_from = stateProvince, values_from = n)
## # A tibble: 186 × 10
## scientificName `New South Wales` Queensland `Western Australia`
## <chr> <int> <int> <int>
## 1 Adelotus brevis 935 1173 NA
## 2 Anstisia lutea NA NA 5
## 3 Anstisia rosea NA NA 11
## 4 Assa darlingtoni 32 133 NA
## 5 Austrochaperina adelphe NA NA NA
## 6 Austrochaperina fryi NA 27 NA
## 7 Austrochaperina gracilipes NA 31 NA
## 8 Austrochaperina pluvialis NA 169 NA
## 9 Austrochaperina robusta NA 29 NA
## 10 Cophixalus aenigma NA 5 NA
## # ℹ 176 more rows
## # ℹ 6 more variables: `Northern Territory` <int>, Tasmania <int>,
## # `Australian Capital Territory` <int>, `South Australia` <int>,
## # Victoria <int>, `Other Territories` <int>
mydata %>%
count(scientificName, stateProvince) %>%
complete(scientificName, stateProvince, fill = list(n = 0))
## # A tibble: 1,674 × 3
## scientificName stateProvince n
## <chr> <chr> <int>
## 1 Adelotus brevis Australian Capital Territory 0
## 2 Adelotus brevis New South Wales 935
## 3 Adelotus brevis Northern Territory 0
## 4 Adelotus brevis Other Territories 0
## 5 Adelotus brevis Queensland 1173
## 6 Adelotus brevis South Australia 0
## 7 Adelotus brevis Tasmania 0
## 8 Adelotus brevis Victoria 0
## 9 Adelotus brevis Western Australia 0
## 10 Anstisia lutea Australian Capital Territory 0
## # ℹ 1,664 more rows
mydata %>%
fill(recordedBy, .direction = "down")
## # A tibble: 136,621 × 11
## occurrenceID eventID decimalLatitude decimalLongitude scientificName
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 12832 525618 -28.5 153. Philoria loveridgei
## 2 12833 526341 -33.7 151. Heleioporus australiac…
## 3 12834 526673 -28.7 153. Mixophyes iteratus
## 4 12835 526673 -28.7 153. Mixophyes fasciolatus
## 5 12836 526673 -28.7 153. Litoria latopalmata
## 6 12837 527056 -30.4 153. Assa darlingtoni
## 7 12838 527058 -30.4 153. Assa darlingtoni
## 8 12839 528103 -30.4 153 Litoria nasuta
## 9 12840 528103 -30.4 153 Mixophyes iteratus
## 10 12841 528103 -30.4 153 Litoria gracilenta
## # ℹ 136,611 more rows
## # ℹ 6 more variables: eventDate <dttm>, eventTime <dttm>, timezone <chr>,
## # coordinateUncertaintyInMeters <dbl>, recordedBy <dbl>, stateProvince <chr>