laptop_data = read.csv("/Users/Lenovo/Downloads/Laptop_price.csv")
head(laptop_data)
##    Brand Processor_Speed RAM_Size Storage_Capacity Screen_Size   Weight
## 1   Asus        3.830296       16              512    11.18515 2.641094
## 2   Acer        2.912833        4             1000    11.31137 3.260012
## 3 Lenovo        3.241627        4              256    11.85302 2.029061
## 4   Acer        3.806248       16              512    12.28036 4.573865
## 5   Acer        3.268097       32             1000    14.99088 4.193472
## 6     HP        1.881348       16              256    11.94396 4.840268
##       Price
## 1 17395.093
## 2 31607.606
## 3  9291.024
## 4 17436.728
## 5 32917.991
## 6  9543.720
colSums(is.na(laptop_data))
##            Brand  Processor_Speed         RAM_Size Storage_Capacity 
##                0                0                0                0 
##      Screen_Size           Weight            Price 
##                0                0                0
laptop_data_clean <- na.omit(laptop_data)
head(laptop_data_clean)
##    Brand Processor_Speed RAM_Size Storage_Capacity Screen_Size   Weight
## 1   Asus        3.830296       16              512    11.18515 2.641094
## 2   Acer        2.912833        4             1000    11.31137 3.260012
## 3 Lenovo        3.241627        4              256    11.85302 2.029061
## 4   Acer        3.806248       16              512    12.28036 4.573865
## 5   Acer        3.268097       32             1000    14.99088 4.193472
## 6     HP        1.881348       16              256    11.94396 4.840268
##       Price
## 1 17395.093
## 2 31607.606
## 3  9291.024
## 4 17436.728
## 5 32917.991
## 6  9543.720
str(laptop_data)
## 'data.frame':    1000 obs. of  7 variables:
##  $ Brand           : chr  "Asus" "Acer" "Lenovo" "Acer" ...
##  $ Processor_Speed : num  3.83 2.91 3.24 3.81 3.27 ...
##  $ RAM_Size        : int  16 4 4 16 32 16 32 4 4 32 ...
##  $ Storage_Capacity: int  512 1000 256 512 1000 256 256 256 512 512 ...
##  $ Screen_Size     : num  11.2 11.3 11.9 12.3 15 ...
##  $ Weight          : num  2.64 3.26 2.03 4.57 4.19 ...
##  $ Price           : num  17395 31608 9291 17437 32918 ...
boxplot(laptop_data_clean$Price, main = "Boxplot of Price", col = "blue")

boxplot(laptop_data_clean$Processor_Speed, main = "Boxplot of Processor_Speed", col = "green")

boxplot(laptop_data_clean$Screen_Size, main = "Boxplot of Screen Size", col = "black")

boxplot(laptop_data_clean$Weight, main = "Boxplot of Weight", col = "pink")

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
laptop_data_clean %>%
group_by(Brand) %>%
summarise(avg_price = mean(Price)) %>%
ggplot(aes(x = reorder(Brand, avg_price), y = avg_price)) +
geom_bar(stat = "identity", fill = "coral") +
coord_flip() +
ggtitle("Average Laptop Price by Brand") +
xlab("Brand") + ylab("Average Price")

ggplot(laptop_data_clean, aes(y = Processor_Speed)) +
geom_boxplot(fill = "lightgreen") +
ggtitle("Boxplot of Processor Speed") +
ylab("Processor Speed (GHz)")

ggplot(laptop_data_clean, aes(x = Processor_Speed, y = Price, color = Brand)) +
geom_point(alpha = 0.7) +
ggtitle("Scatter Plot: Price vs Processor Speed by Brand") +
xlab("Processor Speed (GHz)") + ylab("Price") +
theme_minimal()

ggplot(laptop_data_clean, aes(x = Processor_Speed, y = Price, color = RAM_Size)) +
geom_point(alpha = 2.5) +
scale_color_gradient(low = "blue", high = "red") +
ggtitle("Scatter Plot: Price vs Processor Speed by RAM Size") +
xlab("Processor Speed (GHz)") + ylab("Price") +
theme_minimal()

ggplot(laptop_data_clean, aes(x = Processor_Speed, y = Price, color = Storage_Capacity)) +
geom_point(alpha = 0.7) +
scale_color_gradient(low = "black", high = "purple") +
ggtitle("Scatter Plot: Price vs Processor Speed by Storage Capacity") +
xlab("Processor Speed (GHz)") + ylab("Price") +
theme_minimal()

library(ggplot2)
library(dplyr)
brand_counts <- laptop_data_clean %>%
count(Brand) %>%
mutate(percent = n / sum(n) * 100,
label = paste0(Brand, " (", round(percent, 1), "%)"))
ggplot(brand_counts, aes(x = "", y = percent, fill = Brand)) +
geom_col(width = 1, color = "red") + # Bar plot with width=1 for full circle
coord_polar(theta = "y") + # Convert to pie chart
geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 2.5) +
labs(title = "Pie Chart of Laptop Brands") +
theme_void() +
theme(legend.position = "none")

library(Boruta)
install.packages("Boruta")
## Warning: package 'Boruta' is in use and will not be installed
library(Boruta)
library(Boruta)
set.seed(123)
boruta_result <- Boruta(Price ~ ., data = laptop_data_clean)
boruta_result
## Boruta performed 99 iterations in 35.41617 secs.
##  2 attributes confirmed important: RAM_Size, Storage_Capacity;
##  3 attributes confirmed unimportant: Brand, Screen_Size, Weight;
##  1 tentative attributes left: Processor_Speed;
plot(boruta_result, las = 2, cex.axis = 0.8,ylim = c(-20, 300))

plot(boruta_result, las = 2, cex.axis = 0.8,ylim = c(-10, 20))

model <- lm(Price ~ Storage_Capacity + RAM_Size + Processor_Speed, data = laptop_data_clean)
summary(model)
## 
## Call:
## lm(formula = Price ~ Storage_Capacity + RAM_Size + Processor_Speed, 
##     data = laptop_data_clean)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -648.9 -129.2   -1.3  127.0  836.0 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      753.80770   29.05806   25.94   <2e-16 ***
## Storage_Capacity  29.97421    0.02009 1491.83   <2e-16 ***
## RAM_Size          50.32853    0.57193   88.00   <2e-16 ***
## Processor_Speed  199.25892    8.60771   23.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 198.6 on 996 degrees of freedom
## Multiple R-squared:  0.9996, Adjusted R-squared:  0.9996 
## F-statistic: 7.467e+05 on 3 and 996 DF,  p-value: < 2.2e-16
confint(model)
##                      2.5 %    97.5 %
## (Intercept)      696.78565 810.82975
## Storage_Capacity  29.93478  30.01364
## RAM_Size          49.20621  51.45085
## Processor_Speed  182.36758 216.15025
anova(model)
## Analysis of Variance Table
## 
## Response: Price
##                   Df     Sum Sq    Mean Sq    F value    Pr(>F)    
## Storage_Capacity   1 8.8016e+10 8.8016e+10 2231674.58 < 2.2e-16 ***
## RAM_Size           1 3.0903e+08 3.0903e+08    7835.53 < 2.2e-16 ***
## Processor_Speed    1 2.1134e+07 2.1134e+07     535.87 < 2.2e-16 ***
## Residuals        996 3.9282e+07 3.9440e+04                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1