Import your data
# excel file
data <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2019/2019-06-25/ufo_sightings.csv") %>%
janitor::clean_names() %>%
# Remove NA from state
filter(!is.na(state))
## Rows: 80332 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): date_time, city_area, state, country, ufo_shape, described_encounte...
## dbl (3): encounter_length, latitude, longitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data
## # A tibble: 74,535 × 11
## date_time city_area state country ufo_shape encounter_length
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 10/10/1949 20:30 san marcos tx us cylinder 2700
## 2 10/10/1949 21:00 lackland afb tx <NA> light 7200
## 3 10/10/1956 21:00 edna tx us circle 20
## 4 10/10/1960 20:00 kaneohe hi us light 900
## 5 10/10/1961 19:00 bristol tn us sphere 300
## 6 10/10/1965 23:45 norwalk ct us disk 1200
## 7 10/10/1966 20:00 pell city al us disk 180
## 8 10/10/1966 21:00 live oak fl us disk 120
## 9 10/10/1968 13:00 hawthorne ca us circle 300
## 10 10/10/1968 19:00 brevard nc us fireball 180
## # ℹ 74,525 more rows
## # ℹ 5 more variables: described_encounter_length <chr>, description <chr>,
## # date_documented <chr>, latitude <dbl>, longitude <dbl>
Pivoting
table4a
## # A tibble: 3 × 3
## country `1999` `2000`
## <chr> <dbl> <dbl>
## 1 Afghanistan 745 2666
## 2 Brazil 37737 80488
## 3 China 212258 213766
table4a_long <- table4a %>%
pivot_longer(cols = c('1999', '2000'),
names_to = 'year',
values_to = "cases")
table4a_long
## # A tibble: 6 × 3
## country year cases
## <chr> <chr> <dbl>
## 1 Afghanistan 1999 745
## 2 Afghanistan 2000 2666
## 3 Brazil 1999 37737
## 4 Brazil 2000 80488
## 5 China 1999 212258
## 6 China 2000 213766
long to wide form
wide to long form
Separating and Uniting
table3
## # A tibble: 6 × 3
## country year rate
## <chr> <dbl> <chr>
## 1 Afghanistan 1999 745/19987071
## 2 Afghanistan 2000 2666/20595360
## 3 Brazil 1999 37737/172006362
## 4 Brazil 2000 80488/174504898
## 5 China 1999 212258/1272915272
## 6 China 2000 213766/1280428583
table3_sep <- table3 %>%
separate(col = year, into = c("cases", "ufo_shape"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 6 rows [1, 2, 3, 4, 5,
## 6].
table3_sep
## # A tibble: 6 × 4
## country cases ufo_shape rate
## <chr> <chr> <chr> <chr>
## 1 Afghanistan 1999 <NA> 745/19987071
## 2 Afghanistan 2000 <NA> 2666/20595360
## 3 Brazil 1999 <NA> 37737/172006362
## 4 Brazil 2000 <NA> 80488/174504898
## 5 China 1999 <NA> 212258/1272915272
## 6 China 2000 <NA> 213766/1280428583
Unite two columns
table3_sep %>%
unite(col = "rate", c(country, ufo_shape), sep = "/")
## # A tibble: 6 × 2
## rate cases
## <chr> <chr>
## 1 Afghanistan/NA 1999
## 2 Afghanistan/NA 2000
## 3 Brazil/NA 1999
## 4 Brazil/NA 2000
## 5 China/NA 1999
## 6 China/NA 2000
Separate a column
Unite two columns
Missing Values