Week 8: Apply it to your data 7

Import your data

# excel file
data <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2019/2019-06-25/ufo_sightings.csv") %>%
    janitor::clean_names() %>%
    # Remove NA from state
    filter(!is.na(state))

## Rows: 80332 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): date_time, city_area, state, country, ufo_shape, described_encounte...
## dbl (3): encounter_length, latitude, longitude
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data

## # A tibble: 74,535 × 11
##    date_time        city_area    state country ufo_shape encounter_length
##    <chr>            <chr>        <chr> <chr>   <chr>                <dbl>
##  1 10/10/1949 20:30 san marcos   tx    us      cylinder              2700
##  2 10/10/1949 21:00 lackland afb tx    <NA>    light                 7200
##  3 10/10/1956 21:00 edna         tx    us      circle                  20
##  4 10/10/1960 20:00 kaneohe      hi    us      light                  900
##  5 10/10/1961 19:00 bristol      tn    us      sphere                 300
##  6 10/10/1965 23:45 norwalk      ct    us      disk                  1200
##  7 10/10/1966 20:00 pell city    al    us      disk                   180
##  8 10/10/1966 21:00 live oak     fl    us      disk                   120
##  9 10/10/1968 13:00 hawthorne    ca    us      circle                 300
## 10 10/10/1968 19:00 brevard      nc    us      fireball               180
## # ℹ 74,525 more rows
## # ℹ 5 more variables: described_encounter_length <chr>, description <chr>,
## #   date_documented <chr>, latitude <dbl>, longitude <dbl>

Pivoting

table4a

## # A tibble: 3 × 3
##   country     `1999` `2000`
##   <chr>        <dbl>  <dbl>
## 1 Afghanistan    745   2666
## 2 Brazil       37737  80488
## 3 China       212258 213766

table4a_long <- table4a %>%
    
    pivot_longer(cols = c('1999', '2000'),
                 names_to = 'year',
                 values_to = "cases")

table4a_long

## # A tibble: 6 × 3
##   country     year   cases
##   <chr>       <chr>  <dbl>
## 1 Afghanistan 1999     745
## 2 Afghanistan 2000    2666
## 3 Brazil      1999   37737
## 4 Brazil      2000   80488
## 5 China       1999  212258
## 6 China       2000  213766

long to wide form

wide to long form

Separating and Uniting

table3

## # A tibble: 6 × 3
##   country      year rate             
##   <chr>       <dbl> <chr>            
## 1 Afghanistan  1999 745/19987071     
## 2 Afghanistan  2000 2666/20595360    
## 3 Brazil       1999 37737/172006362  
## 4 Brazil       2000 80488/174504898  
## 5 China        1999 212258/1272915272
## 6 China        2000 213766/1280428583

table3_sep <- table3 %>%
    
    separate(col = year, into = c("cases", "ufo_shape"))

## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 6 rows [1, 2, 3, 4, 5,
## 6].

table3_sep

## # A tibble: 6 × 4
##   country     cases ufo_shape rate             
##   <chr>       <chr> <chr>     <chr>            
## 1 Afghanistan 1999  <NA>      745/19987071     
## 2 Afghanistan 2000  <NA>      2666/20595360    
## 3 Brazil      1999  <NA>      37737/172006362  
## 4 Brazil      2000  <NA>      80488/174504898  
## 5 China       1999  <NA>      212258/1272915272
## 6 China       2000  <NA>      213766/1280428583

Unite two columns

table3_sep %>%
    
    unite(col = "rate", c(country, ufo_shape), sep = "/")

## # A tibble: 6 × 2
##   rate           cases
##   <chr>          <chr>
## 1 Afghanistan/NA 1999 
## 2 Afghanistan/NA 2000 
## 3 Brazil/NA      1999 
## 4 Brazil/NA      2000 
## 5 China/NA       1999 
## 6 China/NA       2000

Week 8: Apply it to your data 7

Daniel Lee

2022-10-05

Import your data

Pivoting

long to wide form

wide to long form

Separating and Uniting

Unite two columns

Separate a column

Unite two columns

Missing Values