# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Load dataset
data <- read.csv("~/Documents/statistics(1)/laptop_prices.csv")
str(data)
## 'data.frame': 1275 obs. of 23 variables:
## $ Company : chr "Apple" "Apple" "HP" "Apple" ...
## $ Product : chr "MacBook Pro" "Macbook Air" "250 G6" "MacBook Pro" ...
## $ TypeName : chr "Ultrabook" "Ultrabook" "Notebook" "Ultrabook" ...
## $ Inches : num 13.3 13.3 15.6 15.4 13.3 15.6 15.4 13.3 14 14 ...
## $ Ram : int 8 8 8 16 8 4 16 8 16 8 ...
## $ OS : chr "macOS" "macOS" "No OS" "macOS" ...
## $ Weight : num 1.37 1.34 1.86 1.83 1.37 2.1 2.04 1.34 1.3 1.6 ...
## $ Price_euros : num 1340 899 575 2537 1804 ...
## $ Screen : chr "Standard" "Standard" "Full HD" "Standard" ...
## $ ScreenW : int 2560 1440 1920 2880 2560 1366 2880 1440 1920 1920 ...
## $ ScreenH : int 1600 900 1080 1800 1600 768 1800 900 1080 1080 ...
## $ Touchscreen : chr "No" "No" "No" "No" ...
## $ IPSpanel : chr "Yes" "No" "No" "Yes" ...
## $ RetinaDisplay : chr "Yes" "No" "No" "Yes" ...
## $ CPU_company : chr "Intel" "Intel" "Intel" "Intel" ...
## $ CPU_freq : num 2.3 1.8 2.5 2.7 3.1 3 2.2 1.8 1.8 1.6 ...
## $ CPU_model : chr "Core i5" "Core i5" "Core i5 7200U" "Core i7" ...
## $ PrimaryStorage : int 128 128 256 512 256 500 256 256 512 256 ...
## $ SecondaryStorage : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PrimaryStorageType : chr "SSD" "Flash Storage" "SSD" "SSD" ...
## $ SecondaryStorageType: chr "No" "No" "No" "No" ...
## $ GPU_company : chr "Intel" "Intel" "Intel" "AMD" ...
## $ GPU_model : chr "Iris Plus Graphics 640" "HD Graphics 6000" "HD Graphics 620" "Radeon Pro 455" ...
# Create derived columns 'Total_Storage' and 'Price_per_GB'
data <- data %>%
mutate(
Total_Storage = PrimaryStorage + SecondaryStorage,
Price_per_GB = Price_euros / Total_Storage
)
# Display the first few rows of the relevant columns for verification
head(data %>% select(Price_euros, Weight, Total_Storage, Price_per_GB))
## Price_euros Weight Total_Storage Price_per_GB
## 1 1339.69 1.37 128 10.466328
## 2 898.94 1.34 128 7.022969
## 3 575.00 1.86 256 2.246094
## 4 2537.45 1.83 512 4.955957
## 5 1803.60 1.37 256 7.045312
## 6 400.00 2.10 500 0.800000
#Created 2 pairs of numeric variables.One for Price_euros and Weight other is for Price_euros and Price_per_GB
#Pair 1: Price_euros (response variable) and Weight (explanatory variable)
#Pair 2: Price_euros (response variable) and Price_per_GB (derived by dividing Price_euros by Total_Storage)
# Scatter Plot 1: Price vs. Weight
plot(data$Weight, data$Price_euros,
xlab = "Weight (kg)",
ylab = "Price (Euros)",
main = "Relationship between Weight and Price",
pch = 19, col = "blue") # Add point color and shape
abline(lm(data$Price_euros ~ data$Weight), col = "red") # Add trend line
#For relationship between Price and Weight, scatter plot visualization is used.It is showing dispersed dispersion. The trend line is indicated in red color, it suggests that weight may not be a strong predictor of price. It has not so strong relationship
#Some laptops with high prices stand out, especially among those with similar weights. These are likely high-end models
# Scatter Plot 2: Price vs. Price per GB
plot(data$Price_per_GB, data$Price_euros,
xlab = "Price per GB (Euros)",
ylab = "Price (Euros)",
main = "Relationship between Price per GB and Price",
pch = 19, col = "red") # Add point color and shape
abline(lm(data$Price_euros ~ data$Price_per_GB), col = "blue") # Add trend line
#There’s a positive relationship between Price_per_GB and the overall Price_euros, as expected—laptops with a higher price per GB tend to be more expensive overall.
#Laptops with an extremely high Price_per_GB are likely due to lower storage capacities, making their price per unit storage disproportionately high.
# Calculate the correlation for Price_euros and Weight
cor_price_weight <- cor(data$Price_euros, data$Weight, use = "complete.obs")
# Calculate the correlation for Price_euros and Price_per_GB
cor_price_price_per_gb <- cor(data$Price_euros, data$Price_per_GB, use = "complete.obs")
# Display the correlation coefficients
cor_price_weight
## [1] 0.2118834
cor_price_price_per_gb
## [1] 0.1206383
#Even though the trend line for Price_euros and Price_per_GB is going upwards, it indicates positive relationship, but the correlation for Price_euros and Price_per_GB is 0.12, indicates, while there may be an upward trend, the relationship is weak
#For correlation for Price_euros and Weight, this indicates a weak positive correlation between price and weight. As weight increases, the price tends to increase slightly, but the relationship is not strong.
# Check if the dataset has the Price_euros column
if("Price_euros" %in% colnames(data)) {
# Calculate mean and standard deviation of Price_euros
mean_price <- mean(data$Price_euros, na.rm = TRUE)
sd_price <- sd(data$Price_euros, na.rm = TRUE)
# Calculate standard error of the mean
se_price <- sd_price / sqrt(length(na.omit(data$Price_euros)))
# Calculate t-value for 95% confidence interval (assuming normal distribution)
t_value <- qt(0.975, df = length(na.omit(data$Price_euros)) - 1)
# Calculate margin of error
margin_of_error <- t_value * se_price
# Calculate confidence interval
lower_bound <- mean_price - margin_of_error
upper_bound <- mean_price + margin_of_error
# Print confidence interval
cat("95% Confidence Interval for Price_euros: [", lower_bound, ",", upper_bound, "]\n")
} else {
cat("Price_euros column not found in the dataset.\n")
}
## 95% Confidence Interval for Price_euros: [ 1096.468 , 1173.47 ]
#we can conclude that we are 95% confident that the true average price of laptops in the population falls between €1096.468 and €1173.47. This interval provides insight into the price range one might expect for the average laptop in the market.These findings might suggest that factors other than weight and price per GB could be more significant predictors of laptop prices like Company, GPU, RAM size etc..