data <- read_excel("../00_data/MyData.xlsx")
data_small <- data %>%
select(Brand, Year, Kilometers_Driven)
data_small
## # A tibble: 100 × 3
## Brand Year Kilometers_Driven
## <chr> <dbl> <dbl>
## 1 Toyota 2018 50000
## 2 Honda 2019 40000
## 3 Ford 2017 20000
## 4 Maruti 2020 30000
## 5 Hyundai 2016 60000
## 6 Tata 2019 35000
## 7 Mahindra 2018 45000
## 8 Volkswagen 2020 25000
## 9 Audi 2017 30000
## 10 BMW 2019 20000
## # ℹ 90 more rows
data_small %>%
pivot_wider(names_from = Year,
values_from = Kilometers_Driven)
## Warning: Values from `Kilometers_Driven` are not uniquely identified; output will
## contain list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
## {data} |>
## dplyr::summarise(n = dplyr::n(), .by = c(Brand, Year)) |>
## dplyr::filter(n > 1L)
## # A tibble: 11 × 7
## Brand `2018` `2019` `2017` `2020` `2016` `2021`
## <chr> <list> <list> <list> <list> <list> <list>
## 1 Toyota <dbl [4]> <NULL> <dbl [2]> <dbl [2]> <dbl [2]> <NULL>
## 2 Honda <dbl [4]> <dbl [1]> <dbl [1]> <NULL> <NULL> <NULL>
## 3 Ford <dbl [2]> <dbl [4]> <dbl [4]> <dbl [1]> <NULL> <NULL>
## 4 Maruti <NULL> <dbl [2]> <NULL> <dbl [4]> <NULL> <NULL>
## 5 Hyundai <dbl [2]> <dbl [6]> <dbl [2]> <NULL> <dbl [1]> <NULL>
## 6 Tata <dbl [5]> <dbl [1]> <NULL> <dbl [5]> <NULL> <NULL>
## 7 Mahindra <dbl [1]> <dbl [2]> <NULL> <NULL> <NULL> <dbl [2]>
## 8 Volkswagen <dbl [1]> <dbl [2]> <dbl [4]> <dbl [3]> <NULL> <NULL>
## 9 Audi <dbl [4]> <NULL> <dbl [5]> <NULL> <dbl [1]> <NULL>
## 10 BMW <dbl [2]> <dbl [6]> <NULL> <dbl [2]> <NULL> <NULL>
## 11 Mercedes <dbl [3]> <dbl [4]> <dbl [3]> <NULL> <NULL> <NULL>
data_small_unique <- data_small %>%
slice(-10)
data_small_unique
## # A tibble: 99 × 3
## Brand Year Kilometers_Driven
## <chr> <dbl> <dbl>
## 1 Toyota 2018 50000
## 2 Honda 2019 40000
## 3 Ford 2017 20000
## 4 Maruti 2020 30000
## 5 Hyundai 2016 60000
## 6 Tata 2019 35000
## 7 Mahindra 2018 45000
## 8 Volkswagen 2020 25000
## 9 Audi 2017 30000
## 10 Mercedes 2018 28000
## # ℹ 89 more rows
data_wide <- data_small_unique %>%
pivot_wider(names_from = Year,
values_from = Kilometers_Driven)
## Warning: Values from `Kilometers_Driven` are not uniquely identified; output will
## contain list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
## {data} |>
## dplyr::summarise(n = dplyr::n(), .by = c(Brand, Year)) |>
## dplyr::filter(n > 1L)
data_long <- data_wide %>%
pivot_longer(`2018` : `2021`, names_to = "Year", values_to = "Kilometers_Driven", values_drop_na = TRUE)
data_long
## # A tibble: 36 × 3
## Brand Year Kilometers_Driven
## <chr> <chr> <list>
## 1 Toyota 2018 <dbl [4]>
## 2 Toyota 2017 <dbl [2]>
## 3 Toyota 2020 <dbl [2]>
## 4 Toyota 2016 <dbl [2]>
## 5 Honda 2018 <dbl [4]>
## 6 Honda 2019 <dbl [1]>
## 7 Honda 2017 <dbl [1]>
## 8 Ford 2018 <dbl [2]>
## 9 Ford 2019 <dbl [4]>
## 10 Ford 2017 <dbl [4]>
## # ℹ 26 more rows
##Note For Dr.Lee I belive all of the codes above are done correctly, since I was using the help from the videos. Im still very confused about the end result of some of the codes.
data_united <- data %>%
unite(col = "YrKm", Year:Kilometers_Driven, sep = "/", remove = TRUE)
data_united
## # A tibble: 100 × 11
## Brand Model YrKm Fuel_Type Transmission Owner_Type Mileage Engine Power
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Toyota Coro… 2018… Petrol Manual First 15 1498 108
## 2 Honda Civic 2019… Petrol Automatic Second 17 1597 140
## 3 Ford Must… 2017… Petrol Automatic First 10 4951 395
## 4 Maruti Swift 2020… Diesel Manual Third 23 1248 74
## 5 Hyundai Sona… 2016… Diesel Automatic Second 18 1999 194
## 6 Tata Nexon 2019… Petrol Manual First 17 1198 108
## 7 Mahindra Scor… 2018… Diesel Automatic Second 15 2179 140
## 8 Volkswagen Polo 2020… Petrol Automatic First 18 999 76
## 9 Audi A4 2017… Diesel Automatic First 18 1968 187
## 10 BMW X1 2019… Diesel Automatic Second 20 1995 190
## # ℹ 90 more rows
## # ℹ 2 more variables: Seats <dbl>, Price <dbl>
data_united %>%
separate(col = YrKm, into = c("Year", "Kilometers_Driven"), sep = "/")
## # A tibble: 100 × 12
## Brand Model Year Kilometers_Driven Fuel_Type Transmission Owner_Type Mileage
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Toyo… Coro… 2018 50000 Petrol Manual First 15
## 2 Honda Civic 2019 40000 Petrol Automatic Second 17
## 3 Ford Must… 2017 20000 Petrol Automatic First 10
## 4 Maru… Swift 2020 30000 Diesel Manual Third 23
## 5 Hyun… Sona… 2016 60000 Diesel Automatic Second 18
## 6 Tata Nexon 2019 35000 Petrol Manual First 17
## 7 Mahi… Scor… 2018 45000 Diesel Automatic Second 15
## 8 Volk… Polo 2020 25000 Petrol Automatic First 18
## 9 Audi A4 2017 30000 Diesel Automatic First 18
## 10 BMW X1 2019 20000 Diesel Automatic Second 20
## # ℹ 90 more rows
## # ℹ 4 more variables: Engine <dbl>, Power <dbl>, Seats <dbl>, Price <dbl>
stocks <- tibble(
year = c(2019, 2019, 2019, 2019, 2020, 2020, 2020),
qtr = c( 1, 2, 3, 4, 2, 3, 4),
return = c(1.88, 0.59, 0.35, NA, 0.92, 0.17, 2.66)
)
stocks %>%
pivot_wider(names_from = year, values_from = return)
## # A tibble: 4 × 3
## qtr `2019` `2020`
## <dbl> <dbl> <dbl>
## 1 1 1.88 NA
## 2 2 0.59 0.92
## 3 3 0.35 0.17
## 4 4 NA 2.66
bikes <- tibble(
bike_model = c("A", "A", "B", "B", "C"),
material = c("steel", "aluminium", "steel", "aluminium", "steel"),
price = c(100, 200, 300, 400, 500)
)
bikes
## # A tibble: 5 × 3
## bike_model material price
## <chr> <chr> <dbl>
## 1 A steel 100
## 2 A aluminium 200
## 3 B steel 300
## 4 B aluminium 400
## 5 C steel 500
bikes %>%
pivot_wider(names_from = bike_model, values_from = price)
## # A tibble: 2 × 4
## material A B C
## <chr> <dbl> <dbl> <dbl>
## 1 steel 100 300 500
## 2 aluminium 200 400 NA
bikes %>%
complete(bike_model, material)
## # A tibble: 6 × 3
## bike_model material price
## <chr> <chr> <dbl>
## 1 A aluminium 200
## 2 A steel 100
## 3 B aluminium 400
## 4 B steel 300
## 5 C aluminium NA
## 6 C steel 500
treatment <- tribble(
~ person, ~ treatment, ~response,
"Derrick Whitmore", 6, 7,
NA, 2, 10,
NA, 3, 9,
"Katherine Burke", 1, 4
)
treatment %>%
fill(person, .direction = "up")
## # A tibble: 4 × 3
## person treatment response
## <chr> <dbl> <dbl>
## 1 Derrick Whitmore 6 7
## 2 Katherine Burke 2 10
## 3 Katherine Burke 3 9
## 4 Katherine Burke 1 4