#Load Necessary Libraries
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'ggplot2' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ purrr 1.2.1
## ✔ ggplot2 4.0.2 ✔ stringr 1.6.0
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'GGally' was built under R version 4.5.3
## Warning: package 'corrplot' was built under R version 4.5.3
## corrplot 0.95 loaded
## Warning: package 'ggpubr' was built under R version 4.5.3
## Warning: package 'janitor' was built under R version 4.5.3
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
## 'data.frame': 10000 obs. of 15 variables:
## $ State : chr "Karnataka" "Rajasthan" "Madhya Pradesh" "Karnataka" ...
## $ Avg.Daily.Distance..km.: num 68.8 23.8 27.7 62.9 10.6 ...
## $ Brand : chr "Royal Enfield" "Bajaj" "KTM" "Kawasaki" ...
## $ Model : chr "Hunter 350" "Dominar 400" "125 Duke" "Ninja 300" ...
## $ Price..INR. : int 252816 131100 201016 132482 231796 60839 349740 193781 145538 227055 ...
## $ Year.of.Manufacture : int 2021 2020 2020 2021 2019 2019 2022 2019 2022 2024 ...
## $ Engine.Capacity..cc. : int 672 769 216 556 298 386 970 375 330 626 ...
## $ Fuel.Type : chr "Electric" "Hybrid" "Hybrid" "Petrol" ...
## $ Mileage..km.l. : num 78.4 90 71.5 51.7 54.7 ...
## $ Owner.Type : chr "Second" "Third" "Second" "Third" ...
## $ Registration.Year : int 2024 2023 2023 2021 2019 2019 2022 2022 2023 2024 ...
## $ Insurance.Status : chr "Active" "Active" "Active" "Active" ...
## $ Seller.Type : chr "Individual" "Individual" "Dealer" "Dealer" ...
## $ Resale.Price..INR. : num 149934 66960 141523 56057 132538 ...
## $ City.Tier : chr "Tier 3" "Tier 3" "Tier 3" "Tier 1" ...
## State Avg.Daily.Distance..km. Brand
## 0 0 0
## Model Price..INR. Year.of.Manufacture
## 0 0 0
## Engine.Capacity..cc. Fuel.Type Mileage..km.l.
## 0 0 0
## Owner.Type Registration.Year Insurance.Status
## 0 0 0
## Seller.Type Resale.Price..INR. City.Tier
## 0 0 0
state_price <- bikes %>%
group_by(state) %>%
summarise(avg_price = mean(price_inr, na.rm = TRUE)) %>%
arrange(desc(avg_price))
state_price## # A tibble: 10 × 2
## state avg_price
## <chr> <dbl>
## 1 Delhi 228110.
## 2 Uttar Pradesh 227511.
## 3 Tamil Nadu 226715.
## 4 Punjab 226566.
## 5 Gujarat 226441.
## 6 Karnataka 225220.
## 7 West Bengal 221925.
## 8 Rajasthan 220820.
## 9 Maharashtra 220032.
## 10 Madhya Pradesh 219937.
top_5_brands <- bikes %>%
group_by(brand) %>%
summarise(avg_resale_price = mean(resale_price_inr, na.rm = TRUE)) %>%
arrange(desc(avg_resale_price)) %>%
slice(1:5)
print(top_5_brands)## # A tibble: 5 × 2
## brand avg_resale_price
## <chr> <dbl>
## 1 Bajaj 136662.
## 2 Yamaha 135535.
## 3 Hero 134885.
## 4 Kawasaki 134373.
## 5 TVS 133497.
top_10_mileage_models <- bikes %>%
group_by(model) %>%
summarise(avg_mileage = mean(mileage_km_l, na.rm = TRUE)) %>%
slice_max(avg_mileage, n = 10)
top_10_mileage_models## # A tibble: 10 × 2
## model avg_mileage
## <chr> <dbl>
## 1 NTorq 125 71.2
## 2 Unicorn 70.6
## 3 Versys 650 69.2
## 4 Splendor Plus 69.1
## 5 Hunter 350 69.0
## 6 Xtreme 160R 68.8
## 7 Pulsar 150 68.8
## 8 Vulcan S 68.7
## 9 Glamour 68.4
## 10 RC 390 68.4
top_10_engine_brands <- bikes %>%
group_by(brand) %>%
summarise(avg_engine_capacity = mean(engine_capacity_cc, na.rm = TRUE)) %>%
arrange(desc(avg_engine_capacity)) %>%
slice(1:10)
top_10_engine_brands## # A tibble: 8 × 2
## brand avg_engine_capacity
## <chr> <dbl>
## 1 Kawasaki 559.
## 2 Yamaha 558.
## 3 Royal Enfield 555.
## 4 Hero 554.
## 5 Honda 550.
## 6 TVS 549.
## 7 Bajaj 548.
## 8 KTM 548.
most_common_year <- bikes %>%
count(registration_year) %>%
arrange(desc(n)) %>%
slice(1)
most_common_year## registration_year n
## 1 2024 2296
top_state_second_owner <- bikes %>%
filter(owner_type == "Second") %>%
group_by(state) %>%
summarise(count = n()) %>%
arrange(desc(count))
top_state_second_owner[1, ]## # A tibble: 1 × 2
## state count
## <chr> <int>
## 1 Maharashtra 368
fuel_mileage <- bikes %>%
group_by(fuel_type) %>%
summarise(avg_mileage = mean(mileage_km_l, na.rm = TRUE)) %>%
arrange(desc(avg_mileage))
fuel_mileage[1, ]## # A tibble: 1 × 2
## fuel_type avg_mileage
## <chr> <dbl>
## 1 Electric 80.1
model_mileage_rank <- bikes %>%
group_by(model) %>%
summarise(avg_mileage = mean(mileage_km_l, na.rm = TRUE)) %>%
mutate(rank = dense_rank(desc(avg_mileage))) %>%
arrange(rank)
model_mileage_rank## # A tibble: 40 × 3
## model avg_mileage rank
## <chr> <dbl> <int>
## 1 NTorq 125 71.2 1
## 2 Unicorn 70.6 2
## 3 Versys 650 69.2 3
## 4 Splendor Plus 69.1 4
## 5 Hunter 350 69.0 5
## 6 Xtreme 160R 68.8 6
## 7 Pulsar 150 68.8 7
## 8 Vulcan S 68.7 8
## 9 Glamour 68.4 9
## 10 RC 390 68.4 10
## # ℹ 30 more rows
top_5_low_depreciation <- bikes %>%
mutate(depreciation = price_inr - resale_price_inr) %>%
group_by(brand) %>%
summarise(avg_depreciation = mean(depreciation, na.rm = TRUE)) %>%
slice_min(avg_depreciation, n = 5)
top_5_low_depreciation## # A tibble: 5 × 2
## brand avg_depreciation
## <chr> <dbl>
## 1 Kawasaki 88790.
## 2 TVS 88972.
## 3 Honda 89979.
## 4 Royal Enfield 90295.
## 5 Yamaha 90387.
bikes <- bikes %>%
mutate(depreciation_pct = ((price_inr - resale_price_inr) / price_inr) * 100)
head(bikes)## state avg_daily_distance_km brand model price_inr
## 1 Karnataka 68.84 Royal Enfield Hunter 350 252816
## 2 Rajasthan 23.80 Bajaj Dominar 400 131100
## 3 Madhya Pradesh 27.67 KTM 125 Duke 201016
## 4 Karnataka 62.85 Kawasaki Ninja 300 132482
## 5 Madhya Pradesh 10.62 Yamaha FZ V3 231796
## 6 Maharashtra 47.71 Hero Splendor Plus 60839
## year_of_manufacture engine_capacity_cc fuel_type mileage_km_l owner_type
## 1 2021 672 Electric 78.41 Second
## 2 2020 769 Hybrid 89.98 Third
## 3 2020 216 Hybrid 71.46 Second
## 4 2021 556 Petrol 51.73 Third
## 5 2019 298 Petrol 54.72 Third
## 6 2019 386 Electric 92.08 First
## registration_year insurance_status seller_type resale_price_inr city_tier
## 1 2024 Active Individual 149934.18 Tier 3
## 2 2023 Active Individual 66960.30 Tier 3
## 3 2023 Active Dealer 141522.64 Tier 3
## 4 2021 Active Dealer 56057.22 Tier 1
## 5 2019 Not Available Individual 132538.36 Tier 3
## 6 2019 Not Available Dealer 47466.45 Tier 3
## depreciation_pct
## 1 40.69435
## 2 48.92426
## 3 29.59633
## 4 57.68692
## 5 42.82112
## 6 21.98023
ggplot(bikes, aes(x = price_inr)) +
geom_histogram(bins = 30, fill = "blue", color = "black") +
ggtitle("Distribution of Bike Prices") +
xlab("Price (INR)") +
ylab("Count")## NULL
state_price <- bikes %>%
group_by(state) %>%
summarise(avg_price = mean(price_inr, na.rm = TRUE)) %>%
arrange(desc(avg_price))
ggplot(state_price, aes(x = reorder(state, avg_price), y = avg_price)) +
geom_bar(stat = "identity", fill = "darkgreen") +
coord_flip() +
ggtitle("Average Price by State") +
xlab("State") +
ylab("Avg Price")## NULL