library(tidyr)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(ggplot2)library(tidyr)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(ggplot2)url <- "https://raw.githubusercontent.com/AslamF/DATA607-Project-2/refs/heads/main/Laptop%20Price%20Dataset.csv"
raw_data <- read.csv(url)
glimpse(raw_data)Rows: 1,303
Columns: 12
$ Unnamed..0 <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
$ Company <chr> "Apple", "Apple", "HP", "Apple", "Apple", "Acer", "Ap…
$ TypeName <chr> "Ultrabook", "Ultrabook", "Notebook", "Ultrabook", "U…
$ Inches <chr> "13.3", "13.3", "15.6", "15.4", "13.3", "15.6", "15.4…
$ ScreenResolution <chr> "IPS Panel Retina Display 2560x1600", "1440x900", "Fu…
$ Cpu <chr> "Intel Core i5 2.3GHz", "Intel Core i5 1.8GHz", "Inte…
$ Ram <chr> "8GB", "8GB", "8GB", "16GB", "8GB", "4GB", "16GB", "8…
$ Memory <chr> "128GB SSD", "128GB Flash Storage", "256GB SSD", "512…
$ Gpu <chr> "Intel Iris Plus Graphics 640", "Intel HD Graphics 60…
$ OpSys <chr> "macOS", "macOS", "No OS", "macOS", "macOS", "Windows…
$ Weight <chr> "1.37kg", "1.34kg", "1.86kg", "1.83kg", "1.37kg", "2.…
$ Price <dbl> 71378.68, 47895.52, 30636.00, 135195.34, 96095.81, 21…
tidy_laptops <- raw_data |>
pivot_longer(
cols = c(Inches, ScreenResolution, Cpu, Ram, Memory, Gpu, OpSys, Weight),
names_to = "spec", # column names become values in "spec"
values_to = "spec_value" # the values go into "spec_value"
)
tidy_laptops <- tidy_laptops |>
rename_with(tolower) |>
rename(
laptop_id = unnamed..0,
brand = company,
type = typename
) |>
mutate(
price = as.numeric(gsub("[^0-9.]", "", as.character(price)))
)
tidy_laptops <- tidy_laptops |>
filter(!is.na(spec_value), spec_value != "", !is.na(price))
# Convert price from INR to USD
tidy_laptops <- tidy_laptops |>
mutate(price_usd = round(price * 0.012, 2))
tidy_laptops |>
print(n = 20)# A tibble: 10,184 × 7
laptop_id brand type price spec spec_value price_usd
<int> <chr> <chr> <dbl> <chr> <chr> <dbl>
1 0 Apple Ultrabook 71379. Inches 13.3 857.
2 0 Apple Ultrabook 71379. ScreenResolution IPS Panel Retina… 857.
3 0 Apple Ultrabook 71379. Cpu Intel Core i5 2.… 857.
4 0 Apple Ultrabook 71379. Ram 8GB 857.
5 0 Apple Ultrabook 71379. Memory 128GB SSD 857.
6 0 Apple Ultrabook 71379. Gpu Intel Iris Plus … 857.
7 0 Apple Ultrabook 71379. OpSys macOS 857.
8 0 Apple Ultrabook 71379. Weight 1.37kg 857.
9 1 Apple Ultrabook 47896. Inches 13.3 575.
10 1 Apple Ultrabook 47896. ScreenResolution 1440x900 575.
11 1 Apple Ultrabook 47896. Cpu Intel Core i5 1.… 575.
12 1 Apple Ultrabook 47896. Ram 8GB 575.
13 1 Apple Ultrabook 47896. Memory 128GB Flash Stor… 575.
14 1 Apple Ultrabook 47896. Gpu Intel HD Graphic… 575.
15 1 Apple Ultrabook 47896. OpSys macOS 575.
16 1 Apple Ultrabook 47896. Weight 1.34kg 575.
17 2 HP Notebook 30636 Inches 15.6 368.
18 2 HP Notebook 30636 ScreenResolution Full HD 1920x1080 368.
19 2 HP Notebook 30636 Cpu Intel Core i5 72… 368.
20 2 HP Notebook 30636 Ram 8GB 368.
# ℹ 10,164 more rows
tidy_laptops |>
filter(spec == "Ram") |>
group_by(spec_value) |>
summarise(avg_price = round(mean(price, na.rm = TRUE), 2)) |>
arrange(desc(avg_price)) |>
print()# A tibble: 10 × 2
spec_value avg_price
<chr> <dbl>
1 32GB 181849.
2 24GB 117553.
3 64GB 117512.
4 16GB 103158.
5 12GB 66037.
6 8GB 63161.
7 1GB 53227.
8 6GB 32826.
9 4GB 30553.
10 2GB 14757.
tidy_laptops |>
filter(spec == "Ram") |>
group_by(spec_value) |>
summarise(avg_price = mean(price_usd, na.rm = TRUE)) |>
ggplot(aes(x = reorder(spec_value, avg_price), y = avg_price)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Average Laptop Price by RAM",
x = "RAM",
y = "Average Price (USD)"
) +
theme_minimal()The Dataset required significant cleaning before analysis was done. Names were inconsistent and price column had non-numeric values. After completing the assignment I was shocked to see the price for a laptop being 150,000… This caused me to re-look at the data and realize the price was being shown in Indian Rupees! I had to go back and mutate the data with an appropriate conversion and then show the USD amount which is easier to interperet and understand. The data is very clear, More RAM means a higher price. 32GB will cost a significant premium and the average user at 8gb of RAM should look to spend around 750 USD.