laptop_data = read.csv("/Users/Lenovo/Downloads/Laptop_price.csv")
head(laptop_data)
## Brand Processor_Speed RAM_Size Storage_Capacity Screen_Size Weight
## 1 Asus 3.830296 16 512 11.18515 2.641094
## 2 Acer 2.912833 4 1000 11.31137 3.260012
## 3 Lenovo 3.241627 4 256 11.85302 2.029061
## 4 Acer 3.806248 16 512 12.28036 4.573865
## 5 Acer 3.268097 32 1000 14.99088 4.193472
## 6 HP 1.881348 16 256 11.94396 4.840268
## Price
## 1 17395.093
## 2 31607.606
## 3 9291.024
## 4 17436.728
## 5 32917.991
## 6 9543.720
colSums(is.na(laptop_data))
## Brand Processor_Speed RAM_Size Storage_Capacity
## 0 0 0 0
## Screen_Size Weight Price
## 0 0 0
laptop_data_clean <- na.omit(laptop_data)
head(laptop_data_clean)
## Brand Processor_Speed RAM_Size Storage_Capacity Screen_Size Weight
## 1 Asus 3.830296 16 512 11.18515 2.641094
## 2 Acer 2.912833 4 1000 11.31137 3.260012
## 3 Lenovo 3.241627 4 256 11.85302 2.029061
## 4 Acer 3.806248 16 512 12.28036 4.573865
## 5 Acer 3.268097 32 1000 14.99088 4.193472
## 6 HP 1.881348 16 256 11.94396 4.840268
## Price
## 1 17395.093
## 2 31607.606
## 3 9291.024
## 4 17436.728
## 5 32917.991
## 6 9543.720
str(laptop_data)
## 'data.frame': 1000 obs. of 7 variables:
## $ Brand : chr "Asus" "Acer" "Lenovo" "Acer" ...
## $ Processor_Speed : num 3.83 2.91 3.24 3.81 3.27 ...
## $ RAM_Size : int 16 4 4 16 32 16 32 4 4 32 ...
## $ Storage_Capacity: int 512 1000 256 512 1000 256 256 256 512 512 ...
## $ Screen_Size : num 11.2 11.3 11.9 12.3 15 ...
## $ Weight : num 2.64 3.26 2.03 4.57 4.19 ...
## $ Price : num 17395 31608 9291 17437 32918 ...
boxplot(laptop_data_clean$Price, main = "Boxplot of Price", col = "blue")

boxplot(laptop_data_clean$Processor_Speed, main = "Boxplot of Processor_Speed", col = "green")

boxplot(laptop_data_clean$Screen_Size, main = "Boxplot of Screen Size", col = "black")

boxplot(laptop_data_clean$Weight, main = "Boxplot of Weight", col = "pink")

library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
laptop_data_clean %>%
group_by(Brand) %>%
summarise(avg_price = mean(Price)) %>%
ggplot(aes(x = reorder(Brand, avg_price), y = avg_price)) +
geom_bar(stat = "identity", fill = "coral") +
coord_flip() +
ggtitle("Average Laptop Price by Brand") +
xlab("Brand") + ylab("Average Price")

ggplot(laptop_data_clean, aes(y = Processor_Speed)) +
geom_boxplot(fill = "lightgreen") +
ggtitle("Boxplot of Processor Speed") +
ylab("Processor Speed (GHz)")

ggplot(laptop_data_clean, aes(x = Processor_Speed, y = Price, color = Brand)) +
geom_point(alpha = 0.7) +
ggtitle("Scatter Plot: Price vs Processor Speed by Brand") +
xlab("Processor Speed (GHz)") + ylab("Price") +
theme_minimal()

ggplot(laptop_data_clean, aes(x = Processor_Speed, y = Price, color = RAM_Size)) +
geom_point(alpha = 2.5) +
scale_color_gradient(low = "blue", high = "red") +
ggtitle("Scatter Plot: Price vs Processor Speed by RAM Size") +
xlab("Processor Speed (GHz)") + ylab("Price") +
theme_minimal()

ggplot(laptop_data_clean, aes(x = Processor_Speed, y = Price, color = Storage_Capacity)) +
geom_point(alpha = 0.7) +
scale_color_gradient(low = "black", high = "purple") +
ggtitle("Scatter Plot: Price vs Processor Speed by Storage Capacity") +
xlab("Processor Speed (GHz)") + ylab("Price") +
theme_minimal()

library(ggplot2)
library(dplyr)
brand_counts <- laptop_data_clean %>%
count(Brand) %>%
mutate(percent = n / sum(n) * 100,
label = paste0(Brand, " (", round(percent, 1), "%)"))
ggplot(brand_counts, aes(x = "", y = percent, fill = Brand)) +
geom_col(width = 1, color = "red") + # Bar plot with width=1 for full circle
coord_polar(theta = "y") + # Convert to pie chart
geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 2.5) +
labs(title = "Pie Chart of Laptop Brands") +
theme_void() +
theme(legend.position = "none")

library(Boruta)
install.packages("Boruta")
## Warning: package 'Boruta' is in use and will not be installed
library(Boruta)
library(Boruta)
set.seed(123)
boruta_result <- Boruta(Price ~ ., data = laptop_data_clean)
boruta_result
## Boruta performed 99 iterations in 35.41617 secs.
## 2 attributes confirmed important: RAM_Size, Storage_Capacity;
## 3 attributes confirmed unimportant: Brand, Screen_Size, Weight;
## 1 tentative attributes left: Processor_Speed;
plot(boruta_result, las = 2, cex.axis = 0.8,ylim = c(-20, 300))

plot(boruta_result, las = 2, cex.axis = 0.8,ylim = c(-10, 20))

model <- lm(Price ~ Storage_Capacity + RAM_Size + Processor_Speed, data = laptop_data_clean)
summary(model)
##
## Call:
## lm(formula = Price ~ Storage_Capacity + RAM_Size + Processor_Speed,
## data = laptop_data_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -648.9 -129.2 -1.3 127.0 836.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 753.80770 29.05806 25.94 <2e-16 ***
## Storage_Capacity 29.97421 0.02009 1491.83 <2e-16 ***
## RAM_Size 50.32853 0.57193 88.00 <2e-16 ***
## Processor_Speed 199.25892 8.60771 23.15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 198.6 on 996 degrees of freedom
## Multiple R-squared: 0.9996, Adjusted R-squared: 0.9996
## F-statistic: 7.467e+05 on 3 and 996 DF, p-value: < 2.2e-16
confint(model)
## 2.5 % 97.5 %
## (Intercept) 696.78565 810.82975
## Storage_Capacity 29.93478 30.01364
## RAM_Size 49.20621 51.45085
## Processor_Speed 182.36758 216.15025
anova(model)
## Analysis of Variance Table
##
## Response: Price
## Df Sum Sq Mean Sq F value Pr(>F)
## Storage_Capacity 1 8.8016e+10 8.8016e+10 2231674.58 < 2.2e-16 ***
## RAM_Size 1 3.0903e+08 3.0903e+08 7835.53 < 2.2e-16 ***
## Processor_Speed 1 2.1134e+07 2.1134e+07 535.87 < 2.2e-16 ***
## Residuals 996 3.9282e+07 3.9440e+04
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1