Import your data

mydata <- read_excel("../00_data/mydata.xlsx")

Pivoting

long to wide form

frog_summary <- mydata %>%
  count(scientificName, stateProvince)


frog_wide <- frog_summary %>%
  pivot_wider(names_from = stateProvince, values_from = n)

wide to long form

frog_wide %>%
  pivot_longer(cols = -scientificName, names_to = "state", values_to = "count")
## # A tibble: 1,674 × 3
##    scientificName  state                        count
##    <chr>           <chr>                        <int>
##  1 Adelotus brevis New South Wales                935
##  2 Adelotus brevis Queensland                    1173
##  3 Adelotus brevis Western Australia               NA
##  4 Adelotus brevis Northern Territory              NA
##  5 Adelotus brevis Tasmania                        NA
##  6 Adelotus brevis Australian Capital Territory    NA
##  7 Adelotus brevis South Australia                 NA
##  8 Adelotus brevis Victoria                        NA
##  9 Adelotus brevis Other Territories               NA
## 10 Anstisia lutea  New South Wales                 NA
## # ℹ 1,664 more rows

Separating and Uniting

Separate a column

mydata_sep <- mydata %>%
  separate(col = eventDate, into = c("year", "month", "day"), sep = "-")

Unite two columns

mydata_sep %>%
  unite(col = "new_date", day, month, year, sep = "/") %>%
  select(new_date, scientificName)
## # A tibble: 136,621 × 2
##    new_date   scientificName          
##    <chr>      <chr>                   
##  1 01/01/2023 Philoria loveridgei     
##  2 02/01/2023 Heleioporus australiacus
##  3 02/01/2023 Mixophyes iteratus      
##  4 02/01/2023 Mixophyes fasciolatus   
##  5 02/01/2023 Litoria latopalmata     
##  6 04/01/2023 Assa darlingtoni        
##  7 04/01/2023 Assa darlingtoni        
##  8 06/01/2023 Litoria nasuta          
##  9 06/01/2023 Mixophyes iteratus      
## 10 06/01/2023 Litoria gracilenta      
## # ℹ 136,611 more rows

Missing Values

mydata %>%
  count(scientificName, stateProvince) %>%
  pivot_wider(names_from = stateProvince, values_from = n)
## # A tibble: 186 × 10
##    scientificName             `New South Wales` Queensland `Western Australia`
##    <chr>                                  <int>      <int>               <int>
##  1 Adelotus brevis                          935       1173                  NA
##  2 Anstisia lutea                            NA         NA                   5
##  3 Anstisia rosea                            NA         NA                  11
##  4 Assa darlingtoni                          32        133                  NA
##  5 Austrochaperina adelphe                   NA         NA                  NA
##  6 Austrochaperina fryi                      NA         27                  NA
##  7 Austrochaperina gracilipes                NA         31                  NA
##  8 Austrochaperina pluvialis                 NA        169                  NA
##  9 Austrochaperina robusta                   NA         29                  NA
## 10 Cophixalus aenigma                        NA          5                  NA
## # ℹ 176 more rows
## # ℹ 6 more variables: `Northern Territory` <int>, Tasmania <int>,
## #   `Australian Capital Territory` <int>, `South Australia` <int>,
## #   Victoria <int>, `Other Territories` <int>
mydata %>%
  count(scientificName, stateProvince) %>%
  complete(scientificName, stateProvince, fill = list(n = 0))
## # A tibble: 1,674 × 3
##    scientificName  stateProvince                    n
##    <chr>           <chr>                        <int>
##  1 Adelotus brevis Australian Capital Territory     0
##  2 Adelotus brevis New South Wales                935
##  3 Adelotus brevis Northern Territory               0
##  4 Adelotus brevis Other Territories                0
##  5 Adelotus brevis Queensland                    1173
##  6 Adelotus brevis South Australia                  0
##  7 Adelotus brevis Tasmania                         0
##  8 Adelotus brevis Victoria                         0
##  9 Adelotus brevis Western Australia                0
## 10 Anstisia lutea  Australian Capital Territory     0
## # ℹ 1,664 more rows
mydata %>%
  fill(recordedBy, .direction = "down")
## # A tibble: 136,621 × 11
##    occurrenceID eventID decimalLatitude decimalLongitude scientificName         
##           <dbl>   <dbl>           <dbl>            <dbl> <chr>                  
##  1        12832  525618           -28.5             153. Philoria loveridgei    
##  2        12833  526341           -33.7             151. Heleioporus australiac…
##  3        12834  526673           -28.7             153. Mixophyes iteratus     
##  4        12835  526673           -28.7             153. Mixophyes fasciolatus  
##  5        12836  526673           -28.7             153. Litoria latopalmata    
##  6        12837  527056           -30.4             153. Assa darlingtoni       
##  7        12838  527058           -30.4             153. Assa darlingtoni       
##  8        12839  528103           -30.4             153  Litoria nasuta         
##  9        12840  528103           -30.4             153  Mixophyes iteratus     
## 10        12841  528103           -30.4             153  Litoria gracilenta     
## # ℹ 136,611 more rows
## # ℹ 6 more variables: eventDate <dttm>, eventTime <dttm>, timezone <chr>,
## #   coordinateUncertaintyInMeters <dbl>, recordedBy <dbl>, stateProvince <chr>