---
title: "EDA_PROJECT"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
theme: flatly
social: menu
source_code: embed
navbar:
- { title: "Dataset Description", href: "#dataset-description" }
- { title: "Univariate Analysis", href: "#univariate-analysis" }
- { title: "Bivariate Analysis", href: "#bivariate-analysis" }
- { title: "Multivariate Analysis", href: "#multivariate-analysis" }
---
```{r}
```
------------------------------------------------------------------------
```{r setup, include=FALSE}
library(flexdashboard)
library(ggplot2)
library(dplyr)
library(tidyr)
library(forecast)
library(zoo)
library(lubridate)
library(tseries)
library(reshape2)
library(ggcorrplot)
library(DT)
```
## Dataset Description {.tabset}
### view of the dataset
```{r}
# Load the Seatbelts dataset
data("Seatbelts")
seatbelts_df<- as.data.frame(Seatbelts)
# Extract the time index
time_index <- time(Seatbelts)
# Add Year and Month columns
seatbelts_df$Year <- floor(time_index)
seatbelts_df$Month <- month.abb[cycle(time_index)]
# Reorder columns to have Year and Month first
seatbelts_df <- seatbelts_df[, c("Year", "Month", setdiff(names(seatbelts_df), c("Year", "Month")))]
datatable(seatbelts_df,extensions = 'Buttons',options = list(dom='Bfrtip',Buttons=c('copy','csv','print','pdf')))
```
### About the dataset
```{r}
str(seatbelts_df)
```
### summary of the dataset
```{r}
summary(seatbelts_df)
```
### Head of DataSet
```{r}
head(seatbelts_df)
```
## Univariate Analysis {.tabset}
### Histogram for petrolprice
```{r}
# Histogram for PetrolPrice with median, mean, and density curve
ggplot(seatbelts_df, aes(x = PetrolPrice)) +
geom_histogram(aes(y = ..density..), binwidth = 0.01, fill = "lightblue", color = "black", alpha = 0.6) +
geom_density(alpha = 0.2) + # Add density curve
geom_vline(aes(xintercept = mean(PetrolPrice)), color = "red", linetype = "dashed", lwd = 1) + # Mean line
geom_vline(aes(xintercept = median(PetrolPrice)), color = "green", linetype = "dashed", lwd = 1) + # Median line
scale_x_continuous(breaks = seq(min(seatbelts_df$PetrolPrice), max(seatbelts_df$PetrolPrice), by = 0.01)) + # Adjust breaks
labs(title = "Histogram of PetrolPrice", x = "PetrolPrice") +
theme_minimal()
```
### Histogram for Drivekilled
```{r}
# Histogram for DriversKilled with median, mean, and density curve
ggplot(seatbelts_df, aes(x = DriversKilled)) +
geom_histogram(aes(y = ..density..), binwidth = 10, fill = "violet", color = "black", alpha = 0.6) +
geom_density(alpha = 0.2) + # Add density curve
geom_vline(aes(xintercept = mean(DriversKilled)), color = "red", linetype = "dashed", lwd = 1) + # Mean line
geom_vline(aes(xintercept = median(DriversKilled)), color = "green", linetype = "dashed", lwd = 1) + # Median line
scale_x_continuous(breaks = seq(min(seatbelts_df$DriversKilled), max(seatbelts_df$DriversKilled), by = 50)) + # Adjust breaks
labs(title = "Histogram of DriversKilled", x = "DriversKilled") +
theme_minimal()
```
### Histogram for drivers
```{r}
# Histogram for Drivers with median, mean, and density curve
ggplot(seatbelts_df, aes(x = drivers)) +
geom_histogram(aes(y = ..density..), binwidth = 10, fill = "purple", color = "black", alpha = 0.6) +
geom_density(alpha = 0.2, ) + # Add density curve
geom_vline(aes(xintercept = mean(drivers)), color = "red", linetype = "dashed", lwd = 1) + # Mean line
geom_vline(aes(xintercept = median(drivers)), color = "green", linetype = "dashed", lwd = 1) + # Median line
scale_x_continuous(breaks = seq(min(seatbelts_df$drivers), max(seatbelts_df$drivers), by = 50)) + # Adjust breaks
labs(title = "Histogram of Drivers", x = "Drivers") +
theme_minimal()
```
### Histogram for front
```{r}
# Histogram for Front with median, mean, and density curve
ggplot(seatbelts_df, aes(x = front)) +
geom_histogram(aes(y = ..density..), binwidth = 50, fill = "orange", color = "black", alpha = 0.6) +
geom_density(alpha = 0.2) + # Add density curve
geom_vline(aes(xintercept = mean(front)), color = "red", linetype = "dashed", lwd = 1) + # Mean line
geom_vline(aes(xintercept = median(front)), color = "green", linetype = "dashed", lwd = 1) + # Median line
scale_x_continuous(breaks = seq(min(seatbelts_df$front), max(seatbelts_df$front), by = 50)) + # Adjust breaks
labs(title = "Histogram of Front", x = "Front") +
theme_minimal()
```
### Histogram for Rear
```{r}
# Histogram for Rear with median, mean, and density curve
ggplot(seatbelts_df, aes(x = rear)) +
geom_histogram(aes(y = ..density..), binwidth = 50, fill = "cyan", color = "black", alpha = 0.6) +
geom_density(alpha = 0.2) + # Add density curve
geom_vline(aes(xintercept = mean(rear)), color = "red", linetype = "dashed", lwd = 1) + # Mean line
geom_vline(aes(xintercept = median(rear)), color = "green", linetype = "dashed", lwd = 1) + # Median line
scale_x_continuous(breaks = seq(min(seatbelts_df$rear), max(seatbelts_df$rear), by = 50)) + # Adjust breaks
labs(title = "Histogram of Rear", x = "Rear") +
theme_minimal()
```
### Histogram for Kms
```{r}
ggplot(seatbelts_df, aes(x = kms)) +
geom_histogram(aes(y = ..density..), binwidth = 1000, fill = "pink", color = "black", alpha = 0.6) +
geom_density(alpha = 0.2) + # Add density curve
geom_vline(aes(xintercept = mean(kms)), color = "red", linetype = "dashed", lwd = 1) + # Mean line
geom_vline(aes(xintercept = median(kms)), color = "green", linetype = "dashed", lwd = 1) + # Median line
scale_x_continuous(breaks = seq(min(seatbelts_df$kms), max(seatbelts_df$kms), by = 1000)) + # Adjust breaks
labs(title = "Histogram of Kms", x = "Kms") +
theme_minimal()
```
### Histogram for VanKilled
```{r}
ggplot(seatbelts_df, aes(x = VanKilled)) +
geom_histogram(aes(y = ..density..), binwidth = 1, fill = "green", color = "black", alpha = 0.6) +
geom_density(alpha = 0.2) + # Add density curve
geom_vline(aes(xintercept = mean(VanKilled)), color = "red", linetype = "dashed", lwd = 1) + # Mean line
geom_vline(aes(xintercept = median(VanKilled)), color = "green", linetype = "dashed", lwd = 1) + # Median line
scale_x_continuous(breaks = seq(min(seatbelts_df$VanKilled), max(seatbelts_df$VanKilled), by = 50)) + # Adjust breaks
labs(title = "Histogram of VanKilled", x = "VanKilled") +
theme_minimal()
```
## Bivariate Analysis {.tabset}
### boxplot of the petrolprice
```{r}
ggplot(seatbelts_df, aes(x = "", y = PetrolPrice)) +
geom_boxplot(fill = "lightblue", color = "black") +
labs(title = "Boxplot of PetrolPrice", y = "PetrolPrice") +
theme_minimal() +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
```
### Boxplot for DriversKilled
```{r}
ggplot(seatbelts_df, aes(x = "", y = DriversKilled)) +
geom_boxplot(fill = "violet", color = "black") +
labs(title = "Boxplot of DriversKilled", y = "DriversKilled") +
theme_minimal() +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
```
### Boxplot of the drivers
```{r}
ggplot(seatbelts_df, aes(x = "", y = drivers)) +
geom_boxplot(fill = "purple", color = "black") +
labs(title = "Boxplot of Drivers", y = "Drivers") +
theme_minimal() +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
```
### boxplot of the front
```{r}
ggplot(seatbelts_df, aes(x = "", y = front)) +
geom_boxplot(fill = "orange", color = "black") +
labs(title = "Boxplot of Front", y = "Front") +
theme_minimal() +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
```
### boxplot of the rear
```{r}
ggplot(seatbelts_df, aes(x = "", y = rear)) +
geom_boxplot(fill = "cyan", color = "black") +
labs(title = "Boxplot of Rear", y = "Rear") +
theme_minimal() +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
```
### boxplot of the kms
```{r}
ggplot(seatbelts_df, aes(x = "", y = kms)) +
geom_boxplot(fill = "pink", color = "black") +
labs(title = "Boxplot of Kms", y = "Kms") +
theme_minimal() +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
```
### boxplot of vankilled
```{r}
ggplot(seatbelts_df, aes(x = "", y = VanKilled)) +
geom_boxplot(fill = "green", color = "black") +
labs(title = "Boxplot of VanKilled", y = "VanKilled") +
theme_minimal() +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
```
## Bivariate Analysis {.tabset}
### Scatter plot of DriversKilled vs. front
```{r}
ggplot(seatbelts_df, aes(x = front, y = DriversKilled)) +
geom_point(color = "red") +
geom_smooth(method = "lm", color = "black", se = FALSE) + # Adding a linear trend line
labs(title = "Scatter Plot of DriversKilled vs. Front", x = "Front", y = "DriversKilled") +
theme_minimal()
```
### Scatter plot of DriversKilled vs. rear
```{r}
ggplot(seatbelts_df, aes(x = rear, y = DriversKilled)) +
geom_point(color = "green") +
geom_smooth(method = "lm", color = "black", se = FALSE) + # Adding a linear trend line
labs(title = "Scatter Plot of DriversKilled vs. Rear", x = "Rear", y = "DriversKilled") +
theme_minimal()
```
### Scatter plot of DriversKilled vs. kms
```{r}
ggplot(seatbelts_df, aes(x = kms, y = DriversKilled)) +
geom_point(color = "purple") +
geom_smooth(method = "lm", color = "black", se = FALSE) + # Adding a linear trend line
labs(title = "Scatter Plot of DriversKilled vs. Kms", x = "Kms", y = "DriversKilled") +
theme_minimal()
```
### Scatter plot of DriversKilled vs. VanKilled
```{r}
ggplot(seatbelts_df, aes(x = VanKilled, y = DriversKilled)) +
geom_point(color = "orange") +
geom_smooth(method = "lm", color = "black", se = FALSE) + # Adding a linear trend line
labs(title = "Scatter Plot of DriversKilled vs. VanKilled", x = "VanKilled", y = "DriversKilled") +
theme_minimal()
```
### Scatter plot of PetrolPrice vs. front
```{r}
ggplot(seatbelts_df, aes(x = PetrolPrice, y = front)) +
geom_point(color = "brown") +
geom_smooth(method = "lm", color = "black", se = FALSE) + # Adding a linear trend line
labs(title = "Scatter Plot of PetrolPrice vs. Front", x = "PetrolPrice", y = "Front") +
theme_minimal()
```
### scatterplot of front vs rear
```{r}
ggplot(seatbelts_df, aes(x = front, y = rear)) +
geom_point(color = "magenta") +
geom_smooth(method = "lm", color = "black", se = FALSE) + # Adding a linear trend line
labs(title = "Scatter Plot of Front vs. Rear", x = "Front", y = "Rear") +
theme_minimal()
```
## Multivariate Analysis {.tabset}
### create the heatmap
```{r}
# Select only numeric columns
numeric_data <- seatbelts_df[sapply(seatbelts_df, is.numeric)]
# Calculate the correlation matrix
correlation_matrix <- cor(numeric_data, use = "complete.obs")
# Create the heatmap
ggcorrplot(correlation_matrix,
lab = TRUE, # Show correlation coefficients
lab_size = 3, # Label size
title = "Correlation Heatmap",
colors = c("blue", "white", "red"), # Color gradient
outline.color = "black") # Cell border color
```