CodeAlong7

Opening a semicolon seperated file:

imported_data <- read_delim("data.txt", ";", comment = "!-", col_names = c("num1", "num2", "num3", "num4", "date"), na = "nuh-uh", show_col_types = "FALSE", locale = locale(decimal_mark=","), col_types = cols(num4 = col_number(), date = col_date("%m/%d/%y")))

imported_data

## # A tibble: 4 × 5
##    num1  num2  num3  num4 date      
##   <dbl> <dbl> <dbl> <dbl> <date>    
## 1  12    13      17  2    2025-01-03
## 2   9    NA       6  3    2026-01-03
## 3   1     2       4  7    2005-01-05
## 4   1.5   7.6     2  1.12 2024-10-12

Saving the data to a new file

write_csv(imported_data, "new_data.csv")

Tidy data

Pivoting

Long to wide form

table4a_long <- table4a %>%
  pivot_longer(cols = c('1999', '2000'), names_to = "year", values_to = "cases")
table4a_long

## # A tibble: 6 × 3
##   country     year   cases
##   <chr>       <chr>  <dbl>
## 1 Afghanistan 1999     745
## 2 Afghanistan 2000    2666
## 3 Brazil      1999   37737
## 4 Brazil      2000   80488
## 5 China       1999  212258
## 6 China       2000  213766

Wide to long form

table4a_long %>%
  pivot_wider(names_from = year, values_from = cases)

## # A tibble: 3 × 3
##   country     `1999` `2000`
##   <chr>        <dbl>  <dbl>
## 1 Afghanistan    745   2666
## 2 Brazil       37737  80488
## 3 China       212258 213766

Seperating and Uniting

Seperate a column

table_sep <- table3 %>%
  separate(col = rate, into = c("cases", "population"))
table_sep

## # A tibble: 6 × 4
##   country      year cases  population
##   <chr>       <dbl> <chr>  <chr>     
## 1 Afghanistan  1999 745    19987071  
## 2 Afghanistan  2000 2666   20595360  
## 3 Brazil       1999 37737  172006362 
## 4 Brazil       2000 80488  174504898 
## 5 China        1999 212258 1272915272
## 6 China        2000 213766 1280428583

Unite two columns

table_sep %>%
  unite(col = "rate", cases:population, sep = "/")

## # A tibble: 6 × 3
##   country      year rate             
##   <chr>       <dbl> <chr>            
## 1 Afghanistan  1999 745/19987071     
## 2 Afghanistan  2000 2666/20595360    
## 3 Brazil       1999 37737/172006362  
## 4 Brazil       2000 80488/174504898  
## 5 China        1999 212258/1272915272
## 6 China        2000 213766/1280428583

Missing Values

stocks <- tibble(
  year = c(2015, 2015, 2015, 2015, 2016, 2016, 2016),
  qtr = c(1,2,3,4,2,3,4),
  return = c(1.88, 0.59, 0.35, NA, 0.92, 0.17, 2.66)
)

stocks %>%
  
  pivot_wider(names_from = year, values_from = return)

## # A tibble: 4 × 3
##     qtr `2015` `2016`
##   <dbl>  <dbl>  <dbl>
## 1     1   1.88  NA   
## 2     2   0.59   0.92
## 3     3   0.35   0.17
## 4     4  NA      2.66

bikes <- tibble(
  bike_model = c("A","A","B","B","C"),
  material = c("steel", "aluminum", "steel", "aluminum", "steel"),
  price = c(100,200,300,400,500)
)

bikes %>%
  
  pivot_wider(names_from = bike_model, values_from = price)

## # A tibble: 2 × 4
##   material     A     B     C
##   <chr>    <dbl> <dbl> <dbl>
## 1 steel      100   300   500
## 2 aluminum   200   400    NA

bikes %>%
  complete(bike_model, material)

## # A tibble: 6 × 3
##   bike_model material price
##   <chr>      <chr>    <dbl>
## 1 A          aluminum   200
## 2 A          steel      100
## 3 B          aluminum   400
## 4 B          steel      300
## 5 C          aluminum    NA
## 6 C          steel      500

treatement <- tribble(
  ~ person, ~ treatment, ~ response,
  "Derrick", 1, 7,
  NA, 2, 10,
  NA, 3, 9,
  "Kthrine", 1, 4
)

treatement %>%
  fill(person, .direction = "down")

## # A tibble: 4 × 3
##   person  treatment response
##   <chr>       <dbl>    <dbl>
## 1 Derrick         1        7
## 2 Derrick         2       10
## 3 Derrick         3        9
## 4 Kthrine         1        4

Non-Tidy Data

Data from external source

Find data

ratings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2022/2022-01-25/ratings.csv', show_col_types = FALSE)
ratings

## # A tibble: 21,831 × 10
##      num     id name          year  rank average bayes_average users_rated url  
##    <dbl>  <dbl> <chr>        <dbl> <dbl>   <dbl>         <dbl>       <dbl> <chr>
##  1   105  30549 Pandemic      2008   106    7.59          7.49      108975 /boa…
##  2   189    822 Carcassonne   2000   190    7.42          7.31      108738 /boa…
##  3   428     13 Catan         1995   429    7.14          6.97      108024 /boa…
##  4    72  68448 7 Wonders     2010    73    7.74          7.63       89982 /boa…
##  5   103  36218 Dominion      2008   104    7.61          7.50       81561 /boa…
##  6   191   9209 Ticket to R…  2004   192    7.41          7.30       76171 /boa…
##  7   100 178900 Codenames     2015   101    7.6           7.51       74419 /boa…
##  8     3 167791 Terraformin…  2016     4    8.42          8.27       74216 /boa…
##  9    15 173346 7 Wonders D…  2015    16    8.11          7.98       69472 /boa…
## 10    35  31260 Agricola      2007    36    7.93          7.81       66093 /boa…
## # ℹ 21,821 more rows
## # ℹ 1 more variable: thumbnail <chr>

Save data

write_csv(x=ratings, file="ratings.csv")

CodeAlong7

2026-03-02

Opening a semicolon seperated file:

Saving the data to a new file

Tidy data

Pivoting

Long to wide form

Wide to long form

Seperating and Uniting

Seperate a column

Unite two columns

Missing Values

Non-Tidy Data

Data from external source

Find data

Save data