Adding my data…
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
car_data = data.frame("ID" = c(1, 2, 3, 4, 1),
"Make" = c("Nissan", "Mazda", "Ford", "Mazda", "Nissan"),
"Car.Model" = c("Rogue", "CX-5", "Focus", "CX-3", "Rogue"),
"Miles" = c("180,000", "140,000", "80,000", "30,000", "180,000"),
"Car.Driver" = c("Sophia", "Betsy", "Nathan", "Tim", "Sophia"),
"Heated.Seats" = c("No", "Yes", "No", "Yes", "No"))
car_data
## ID Make Car.Model Miles Car.Driver Heated.Seats
## 1 1 Nissan Rogue 180,000 Sophia No
## 2 2 Mazda CX-5 140,000 Betsy Yes
## 3 3 Ford Focus 80,000 Nathan No
## 4 4 Mazda CX-3 30,000 Tim Yes
## 5 1 Nissan Rogue 180,000 Sophia No
Removing unnecessary information and removing duplicates…
cars_data = car_data[1:5]
cars_data = distinct(cars_data)
cars_data
## ID Make Car.Model Miles Car.Driver
## 1 1 Nissan Rogue 180,000 Sophia
## 2 2 Mazda CX-5 140,000 Betsy
## 3 3 Ford Focus 80,000 Nathan
## 4 4 Mazda CX-3 30,000 Tim
Renaming column names and checking for missing data…
cars_data = rename(cars_data, Model = Car.Model, Driver = Car.Driver)
cars_data
## ID Make Model Miles Driver
## 1 1 Nissan Rogue 180,000 Sophia
## 2 2 Mazda CX-5 140,000 Betsy
## 3 3 Ford Focus 80,000 Nathan
## 4 4 Mazda CX-3 30,000 Tim
anyNA(cars_data)
## [1] FALSE
Fixing numeric data…
cars_data$Miles = as.numeric(str_replace_all(cars_data$Miles, "[,]", ""))
cars_data
## ID Make Model Miles Driver
## 1 1 Nissan Rogue 180000 Sophia
## 2 2 Mazda CX-5 140000 Betsy
## 3 3 Ford Focus 80000 Nathan
## 4 4 Mazda CX-3 30000 Tim
Merging and combining data…
old_cars = data.frame("ID" = 5,
"Make" = "Dodge",
"Model" = "Charger",
"Miles" = "20,000",
"Driver" = "Tim")
old_cars
## ID Make Model Miles Driver
## 1 5 Dodge Charger 20,000 Tim
old_cars$Miles = as.numeric(str_replace_all(old_cars$Miles, "[,]", ""))
old_cars
## ID Make Model Miles Driver
## 1 5 Dodge Charger 20000 Tim
cars_data = rbind(cars_data, old_cars)
cars_data
## ID Make Model Miles Driver
## 1 1 Nissan Rogue 180000 Sophia
## 2 2 Mazda CX-5 140000 Betsy
## 3 3 Ford Focus 80000 Nathan
## 4 4 Mazda CX-3 30000 Tim
## 5 5 Dodge Charger 20000 Tim
extra_data = data.frame("ID" = c(1, 2, 3, 4, 5),
"Color" = c("Grey", "Red", "Black", "Black", "Navy"))
extra_data
## ID Color
## 1 1 Grey
## 2 2 Red
## 3 3 Black
## 4 4 Black
## 5 5 Navy
cars_data = merge(cars_data, extra_data, by = "ID")
cars_data
## ID Make Model Miles Driver Color
## 1 1 Nissan Rogue 180000 Sophia Grey
## 2 2 Mazda CX-5 140000 Betsy Red
## 3 3 Ford Focus 80000 Nathan Black
## 4 4 Mazda CX-3 30000 Tim Black
## 5 5 Dodge Charger 20000 Tim Navy
Final data set…
cars_data
## ID Make Model Miles Driver Color
## 1 1 Nissan Rogue 180000 Sophia Grey
## 2 2 Mazda CX-5 140000 Betsy Red
## 3 3 Ford Focus 80000 Nathan Black
## 4 4 Mazda CX-3 30000 Tim Black
## 5 5 Dodge Charger 20000 Tim Navy