---
title: "Air Quality Dashboard"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
theme: cosmo
social: menu
source_code: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
library(dplyr)
library(ggplot2)
library(DT)
```
## Dataset Description {.tabset}
### Structure of dataset
```{r}
# Load the dataset
air_quality <- read.csv("Air_Quality.csv")
# View the structure of the dataset
str(air_quality)
```
### Summary
```{r}
#summary of the dataset
summary(air_quality)
# Replace missing values with median
air_quality$pollutant_min[is.na(air_quality$pollutant_min)] <- median(air_quality$pollutant_min, na.rm = TRUE)
air_quality$pollutant_max[is.na(air_quality$pollutant_max)] <- median(air_quality$pollutant_max, na.rm = TRUE)
air_quality$pollutant_avg[is.na(air_quality$pollutant_avg)] <- median(air_quality$pollutant_avg, na.rm = TRUE)
# Remove duplicate rows
air_quality <- air_quality %>% distinct()
```
## Dataset visualization {.tabset}
### Bar plot to compare average pollutant levels by state
```{r}
# Aggregate data to get the mean pollutant_avg for each state
state_pollution_avg <- air_quality %>%
group_by(state) %>%
summarise(mean_pollutant_avg = mean(pollutant_avg, na.rm = TRUE))
# Bar plot to compare average pollutant levels by state
ggplot(state_pollution_avg, aes(x = reorder(state, mean_pollutant_avg), y = mean_pollutant_avg)) +
geom_bar(stat = "identity", fill = "lightblue") +
coord_flip() + # Flip coordinates to make the plot more readable
labs(title = "Average Pollutant Levels by State", x = "State", y = "Average Pollutant Levels") +
theme_minimal()
```
### Boxplot of pollutant_avg for each state
```{r}
ggplot(air_quality, aes(x = state, y = pollutant_avg)) +
geom_boxplot(fill = "lightgreen") +
coord_flip() + # Flip coordinates for better readability
labs(title = "Boxplot of Pollutant Levels by State", x = "State", y = "Pollutant Average") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
```
### Boxplot of pollutant_avg by pollutant type
```{r}
ggplot(air_quality, aes(x = pollutant_id, y = pollutant_avg)) +
geom_boxplot(fill = "lightgreen") +
coord_flip() +
labs(title = "Boxplot of Pollutant Levels by Pollutant Type", x = "Pollutant", y = "Pollutant Level") +
theme_minimal()
```
## HISTOGRAM visualization {.tabset}
### Histogram of PM2.5 pollutant average values
```{r}
# Subset data for a specific pollutant, e.g., PM2.5
subset_PM25 <- air_quality %>% filter(pollutant_id == "PM2.5")
ggplot(subset_PM25, aes(x = pollutant_avg)) +
geom_histogram(binwidth = 10, fill = "lightgreen", color = "black") +
labs(title = "Histogram of PM2.5 Pollutant Averages", x = "Pollutant Average", y = "Frequency")
```
### Histogram of PM2.5 pollutant minimum values
```{r}
ggplot(subset_PM25, aes(x = pollutant_min)) +
geom_histogram(binwidth = 10, fill = "lightcoral", color = "black") +
labs(title = "Histogram of PM2.5 Pollutant Minimum Values", x = "Pollutant Min", y = "Frequency")
```
### Histogram of PM2.5 pollutant maximum values
```{r}
ggplot(subset_PM25, aes(x = pollutant_max)) +
geom_histogram(binwidth = 10, fill = "lightblue", color = "black") +
labs(title = "Histogram of PM2.5 Pollutant Maximum Values", x = "Pollutant Max", y = "Frequency")
```
## Boxplot visualization {.tabset}
### Boxplot of PM2.5 pollutant average values
```{r}
ggplot(subset_PM25, aes(y = pollutant_avg)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot of PM2.5 Pollutant Averages", y = "Pollutant Average")
```
### Boxplot of PM2.5 pollutant minimum values
```{r}
ggplot(subset_PM25, aes(y = pollutant_min)) +
geom_boxplot(fill = "lightpink") +
labs(title = "Boxplot of PM2.5 Pollutant Minimum Values", y = "Pollutant Min")
```
### Boxplot of PM2.5 pollutant maximum values
```{r}
ggplot(subset_PM25, aes(y = pollutant_max)) +
geom_boxplot(fill = "lightyellow") +
labs(title = "Boxplot of PM2.5 Pollutant Maximum Values", y = "Pollutant Max")
```
## Scatterplot visualization {.tabset}
### Scatter plot of pollutant_min vs pollutant_max for PM2
```{r}
ggplot(subset_PM25, aes(x = pollutant_min, y = pollutant_max)) +
geom_point(color = "blue") +
labs(title = "Scatter Plot of PM2.5 Min vs Max", x = "Pollutant Min", y = "Pollutant Max")
```
### Scatter plot of pollutant_avg vs pollutant_min for PM2.5
```{r}
ggplot(subset_PM25, aes(x = pollutant_min, y = pollutant_avg)) +
geom_point(color = "darkgreen") +
labs(title = "Scatter Plot of PM2.5 Avg vs Min", x = "Pollutant Min", y = "Pollutant Avg")
```
### Scatter plot of pollutant_avg vs pollutant_max for PM2.5
```{r}
ggplot(subset_PM25, aes(x = pollutant_max, y = pollutant_avg)) +
geom_point(color = "orange") +
labs(title = "Scatter Plot of PM2.5 Avg vs Max", x = "Pollutant Max", y = "Pollutant Avg")
```
## Linechart visualization {.tabset}
### Line plot of PM2.5 pollutant averages over time
```{r}
ggplot(subset_PM25, aes(x = last_update, y = pollutant_avg)) +
geom_line(color = "purple") +
labs(title = "PM2.5 Average Over Time", x = "Date", y = "Pollutant Average") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```