library(ggplot2)

data <- airquality 
str(data)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
summary(data)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 

#bar graph

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

clean_data <- airquality %>%
  filter(!is.na(Wind)) %>%                     
  group_by(Month) %>%                          
  summarize(avg_wind = mean(Wind, na.rm = TRUE))

# data cleaning - Bar graph
ggplot(clean_data, aes(x = factor(Month), y = avg_wind, fill = Month)) +
  geom_bar(stat = "identity") +
  labs(title = "Month of Average Wind",
       x = "month",
       y = "average of wind") +
  theme_minimal() +
  theme(legend.position = "none")

#histogram

library(ggplot2)

# 히스토그램 생성
ggplot(data = airquality, aes(x = Wind)) +
  geom_histogram(binwidth = 2, fill = "pink", color = "black") +
  labs(title = "Wind Speed Distribution",
       x = "wind speed",
       y = "frequency") +
  theme_minimal()

#boxplot

library(ggplot2)

#boxplot
ggplot(data = airquality, aes(x = factor(Month), y = Wind, fill = Month)) +
  geom_boxplot() +
  labs(title = "Monthly Wind speed",
       x = "month",
       y = "wind") +
  theme_minimal() +
  theme(legend.position = "none")

#line graph

library(ggplot2)

#line graph
ggplot(data = airquality, aes(x = factor(Month), y = Wind)) +
  geom_line(color = "blue") +
  labs(title = "Monthly Wind Speed",
       x = "monthly",
       y = "wind speed") +
  theme_minimal()

#Facet Plot

library(ggplot2)

# 페싯 플롯 생성: 차종별 엔진 배기량과 고속도로 연비
ggplot(data = airquality, aes(x = Wind, y = Ozone)) +
  geom_point() +
  facet_wrap(~ Month) +
  labs(title = "Monthly Wind Speed per Ozone",
       x = "Wind",
       y = "Ozone Layer") +
  theme_minimal()
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).