library(ggplot2) filename <- “bike_sales_india.csv” df1 <- read.csv(filename, stringsAsFactors = FALSE)

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Including Plots

You can also embed plots, for example:

# 1. Most Popular Bike Brands in India (bar graph)
brand_total <- table(df1$Brand)
#print(brand_total)
brand_bar_graph <- ggplot(df1, aes(x = Brand)) +
  geom_bar(fill = "aquamarine") +
  geom_text(stat='count', aes(label=after_stat(count)), vjust=-0.5) +
  labs(title = "Most Popular Bike Brands", x = "Brand", y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(brand_bar_graph)

#install.packages("rmarkdown")
#install.packages('knitr')

# 2. Price vs Resale value of popular bikes Kawasaki and Yamaha (scatterplot)
kawasaki_yamaha_1 <- subset(df1, Brand %in% c("Kawasaki", "Yamaha"))
price_resale_plot <- ggplot(kawasaki_yamaha_1, aes(x = Price..INR., y = Resale.Price..INR., color = Brand)) +
  geom_point(alpha = 0.5) +
  labs(title = "Bike Price vs. Resale Value (Kawasaki & Yamaha)", x = "Price (INR)", y = "Resale Price (INR)") +
  scale_x_continuous(labels = scales::comma) +
  scale_y_continuous(labels = scales::comma)
print(price_resale_plot)

# 3. Fuel Type Distribution for Kawasaki and Yamaha (histogram)
kawasaki_yamaha_1 <- subset(df1, Brand %in% c("Kawasaki", "Yamaha"))
fuel_counts <- table(kawasaki_yamaha_1$Fuel.Type)
fuel_plot <- ggplot(kawasaki_yamaha_1, aes(x = Fuel.Type, fill = Brand)) +
  geom_bar(position = "dodge") +
  geom_text(stat='count', aes(label=after_stat(count)), vjust=-0.5, position = position_dodge(width = 0.9)) +
  labs(title = "Fuel Type Distribution (Kawasaki & Yamaha)", x = "Fuel Type", y = "Count")
print(fuel_plot)

# 4. Engine Capacity vs. Mileage 
engine_mileage_heatmap <- ggplot(df1, aes(x = Engine.Capacity..cc., y = Mileage..km.l.)) +
  geom_bin2d(binwidth = c(50, 5)) +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  labs(title = "Engine Capacity vs. Mileage ", x = "Engine Capacity (cc)", y = "Mileage (km/l)", fill = "Count")
print(engine_mileage_heatmap)

# 5. Sales by City Tier for Kawasaki 
kawasaki_sales <- subset(df1, Brand == "Kawasaki")
city_tier_counts <- as.data.frame(table(kawasaki_sales$City.Tier))
colnames(city_tier_counts) <- c("City.Tier", "Count")
city_tier_pie <- ggplot(city_tier_counts, aes(x = "", y = Count, fill = City.Tier)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  geom_text(aes(label = Count), position = position_stack(vjust = 0.5)) +
  labs(title = "Bike Sales by City Tier (Kawasaki)", x = NULL, y = NULL) +
  theme_void()
print(city_tier_pie)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.