project on flipkart analysis

library(readr)
Flipkart_Mobiles <- read_csv("C:/Users/aby48/Desktop/Flipkart_Mobiles.csv")

## Rows: 3114 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Brand, Model, Color, Memory, Storage
## dbl (3): Rating, Selling Price, Original Price
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(Flipkart_Mobiles)



summary(Flipkart_Mobiles$Memory)

##    Length     Class      Mode 
##      3114 character character

summary(Flipkart_Mobiles$Storage)

##    Length     Class      Mode 
##      3114 character character

summary(Flipkart_Mobiles$Rating)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.300   4.100   4.300   4.243   4.400   5.000     144

summary(Flipkart_Mobiles$`Selling Price`)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1000    9990   15000   26437   28999  179900

summary(Flipkart_Mobiles$`Original Price`)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1000   10030   16890   28333   31500  189999

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Flipkart_Mobiles%>%
  na.omit()%>%
  group_by(Brand)%>%
  ggplot(aes(Brand,Rating))+
  geom_point(size=4)

Flipkart_Mobiles$Memory<- as.numeric(gsub("GB", "", Flipkart_Mobiles$Memory))

## Warning: NAs introduced by coercion

sum(is.na(Flipkart_Mobiles$Memory))

## [1] 207

hist(Flipkart_Mobiles$Memory, main = "Memory Distribution", xlab = "Memory (GB)")

boxplot(Flipkart_Mobiles$`Selling Price`, main="Selling Price Box Plot")

plot(Flipkart_Mobiles$`Selling Price`,Flipkart_Mobiles$Rating, main="Scatter Plot of Selling Price vs. Rating", xlab="Selling Price", ylab="Rating")

brand_counts <- table(Flipkart_Mobiles$Brand)
barplot(brand_counts, main="Brand Distribution", xlab="Brand", ylab="Frequency")

plot(Flipkart_Mobiles$`Original Price`, Flipkart_Mobiles$`Selling Price`, main="Original Price vs. Selling Price", xlab="Original Price", ylab="Selling Price")

contingency_table <- table(Flipkart_Mobiles$Brand, Flipkart_Mobiles$Model)
library(ggplot2)
ggplot(data = Flipkart_Mobiles, aes(x = Brand)) +
  geom_bar() +
  labs(title = "Distribution of Brands")

library(ggplot2)
ggplot(data = Flipkart_Mobiles, aes(x = Brand, fill = Brand)) +
  geom_bar() +
  labs(title = "Brand Distribution") +
  theme_minimal()

library(ggplot2) 
library(dplyr)
ggplot(data = Flipkart_Mobiles, aes(x = Rating, fill = Brand)) +
  geom_density(alpha = 0.7) +
  labs(title = "Density Plot of Ratings by Brand") +
  theme_minimal()

## Warning: Removed 144 rows containing non-finite values (`stat_density()`).

ggplot(data = Flipkart_Mobiles, aes(x = Brand, y = `Selling Price`, fill = Brand)) +
  geom_boxplot() +
  labs(title = "Box Plot of Selling Price by Brand") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(ggplot2)
Flipkart_Mobiles%>%
  ggplot(aes(x=Memory, y=`Selling Price`)) +
  geom_point() +
  labs(title = "selling price based on memory",
       y = "selling price",
       x = "memory")

## Warning: Removed 207 rows containing missing values (`geom_point()`).

project on flipkart analysis

abyanshu

2023-09-18