library(readr)
Flipkart_Mobiles <- read_csv("C:/Users/aby48/Desktop/Flipkart_Mobiles.csv")
## Rows: 3114 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Brand, Model, Color, Memory, Storage
## dbl (3): Rating, Selling Price, Original Price
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Flipkart_Mobiles)
summary(Flipkart_Mobiles$Memory)
## Length Class Mode
## 3114 character character
summary(Flipkart_Mobiles$Storage)
## Length Class Mode
## 3114 character character
summary(Flipkart_Mobiles$Rating)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.300 4.100 4.300 4.243 4.400 5.000 144
summary(Flipkart_Mobiles$`Selling Price`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1000 9990 15000 26437 28999 179900
summary(Flipkart_Mobiles$`Original Price`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1000 10030 16890 28333 31500 189999
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Flipkart_Mobiles%>%
na.omit()%>%
group_by(Brand)%>%
ggplot(aes(Brand,Rating))+
geom_point(size=4)

Flipkart_Mobiles$Memory<- as.numeric(gsub("GB", "", Flipkart_Mobiles$Memory))
## Warning: NAs introduced by coercion
sum(is.na(Flipkart_Mobiles$Memory))
## [1] 207
hist(Flipkart_Mobiles$Memory, main = "Memory Distribution", xlab = "Memory (GB)")

boxplot(Flipkart_Mobiles$`Selling Price`, main="Selling Price Box Plot")

plot(Flipkart_Mobiles$`Selling Price`,Flipkart_Mobiles$Rating, main="Scatter Plot of Selling Price vs. Rating", xlab="Selling Price", ylab="Rating")

brand_counts <- table(Flipkart_Mobiles$Brand)
barplot(brand_counts, main="Brand Distribution", xlab="Brand", ylab="Frequency")

plot(Flipkart_Mobiles$`Original Price`, Flipkart_Mobiles$`Selling Price`, main="Original Price vs. Selling Price", xlab="Original Price", ylab="Selling Price")

contingency_table <- table(Flipkart_Mobiles$Brand, Flipkart_Mobiles$Model)
library(ggplot2)
ggplot(data = Flipkart_Mobiles, aes(x = Brand)) +
geom_bar() +
labs(title = "Distribution of Brands")

library(ggplot2)
ggplot(data = Flipkart_Mobiles, aes(x = Brand, fill = Brand)) +
geom_bar() +
labs(title = "Brand Distribution") +
theme_minimal()

library(ggplot2)
library(dplyr)
ggplot(data = Flipkart_Mobiles, aes(x = Rating, fill = Brand)) +
geom_density(alpha = 0.7) +
labs(title = "Density Plot of Ratings by Brand") +
theme_minimal()
## Warning: Removed 144 rows containing non-finite values (`stat_density()`).

ggplot(data = Flipkart_Mobiles, aes(x = Brand, y = `Selling Price`, fill = Brand)) +
geom_boxplot() +
labs(title = "Box Plot of Selling Price by Brand") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(ggplot2)
Flipkart_Mobiles%>%
ggplot(aes(x=Memory, y=`Selling Price`)) +
geom_point() +
labs(title = "selling price based on memory",
y = "selling price",
x = "memory")
## Warning: Removed 207 rows containing missing values (`geom_point()`).
