---
title: "EDA for SETC Dataset"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
theme: flatly
social: menu
source_code: embed
navbar:
- { title: "Dataset Description", href: "#dataset-description" }
- { title: "Univariate Analysis", href: "#univariate-analysis" }
- { title: "Bivariate Analysis", href: "#bivariate-analysis" }
- { title: "Multivariate Analysis", href: "#multivariate-analysis" }
---
```{r setup, include=FALSE}
library(flexdashboard)
library(ggplot2)
library(dplyr)
# Load your dataset
bus <- read.csv("SETC.csv")
```
## Dataset Description {.tabset .active} {#dataset-description}
### About the Dataset
```{r}
str(bus)
```
### Summary of the dataset
```{r}
summary(bus)
```
### Head of Dataset
```{r}
head(bus)
```
## Univariate Analysis {.tabset} {#univariate-analysis}
### Histogram for Distribution of Route Length
```{r}
# Histogram: Distribution of Route Length
ggplot(bus, aes(x = Route.Length)) +
geom_histogram(binwidth = 50, fill = "skyblue", color = "black", alpha = 0.7) +
labs(title = "Histogram of Route Length",
x = "Route Length (km)", y = "Frequency") +
theme_minimal()
```
### Histogram for Distribution of Number of Services
```{r}
# Histogram: Distribution of Number of Services
ggplot(bus, aes(x = No.of.Service)) +
geom_histogram(binwidth = 1, fill = "purple", color = "black", alpha = 0.7) +
labs(title = "Histogram of Number of Services",
x = "Number of Services", y = "Frequency") +
theme_minimal()
```
## Bivariate Analysis {.tabset} {#bivariate-analysis}
### Box Plot for Distribution of Route Length by Type of Service
```{r}
# Box Plot: Distribution of Route Length by Type of Service
ggplot(bus, aes(x = Type, y = Route.Length)) +
geom_boxplot(fill = "lightgreen", color = "darkgreen") +
labs(title = "Box Plot of Route Length by Type of Service",
x = "Type of Service", y = "Route Length (km)") +
theme_minimal()
```
### Box Plot for Number of Services by Type of Service
```{r}
# Box Plot: Number of Services by Type of Service
ggplot(bus, aes(x = Type, y = No.of.Service)) +
geom_boxplot(fill = "orange", color = "darkorange") +
labs(title = "Box Plot of Number of Services by Type of Service",
x = "Type of Service", y = "Number of Services") +
theme_minimal()
```
## Multivariate Analysis {.tabset} {#multivariate-analysis}
### Scatter Plot for Route Length vs No. of Services
```{r}
# Scatter Plot: Route Length vs No. of Services
ggplot(bus, aes(x = Route.Length, y = No.of.Service)) +
geom_point(color = "blue", alpha = 0.6) +
geom_smooth(method = "lm", color = "red", linetype = "dashed") + # Adds trendline
labs(title = "Scatter Plot of Route Length vs No. of Services",
x = "Route Length (km)", y = "Number of Services") +
theme_minimal()
```
### Scatter Plot for Departure Time vs Route Length
```{r}
# Scatter Plot: Departure Time vs Route Length
ggplot(bus, aes(x = Departure.Timings, y = Route.Length)) +
geom_point(color = "green", alpha = 0.6) +
labs(title = "Scatter Plot of Departure Time vs Route Length",
x = "Departure Timings", y = "Route Length (km)") +
theme_minimal()
```
### Scatter Plot for Route Length vs Departure Time (with color by Type)
```{r}
# Scatter Plot: Route Length vs Departure Time (with color by Type)
ggplot(bus, aes(x = Departure.Timings, y = Route.Length, color = Type)) +
geom_point(alpha = 0.7) +
labs(title = "Scatter Plot of Departure Timings vs Route Length by Type",
x = "Departure Timings", y = "Route Length (km)") +
theme_minimal()
```