library(ggplot2)
data <- airquality
str(data)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
summary(data)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
#bar graph
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
clean_data <- airquality %>%
filter(!is.na(Wind)) %>%
group_by(Month) %>%
summarize(avg_wind = mean(Wind, na.rm = TRUE))
# data cleaning - Bar graph
ggplot(clean_data, aes(x = factor(Month), y = avg_wind, fill = Month)) +
geom_bar(stat = "identity") +
labs(title = "Month of Average Wind",
x = "month",
y = "average of wind") +
theme_minimal() +
theme(legend.position = "none")
#histogram
library(ggplot2)
# 히스토그램 생성
ggplot(data = airquality, aes(x = Wind)) +
geom_histogram(binwidth = 2, fill = "pink", color = "black") +
labs(title = "Wind Speed Distribution",
x = "wind speed",
y = "frequency") +
theme_minimal()
#boxplot
library(ggplot2)
#boxplot
ggplot(data = airquality, aes(x = factor(Month), y = Wind, fill = Month)) +
geom_boxplot() +
labs(title = "Monthly Wind speed",
x = "month",
y = "wind") +
theme_minimal() +
theme(legend.position = "none")
#line graph
library(ggplot2)
#line graph
ggplot(data = airquality, aes(x = factor(Month), y = Wind)) +
geom_line(color = "blue") +
labs(title = "Monthly Wind Speed",
x = "monthly",
y = "wind speed") +
theme_minimal()
#Facet Plot
library(ggplot2)
# 페싯 플롯 생성: 차종별 엔진 배기량과 고속도로 연비
ggplot(data = airquality, aes(x = Wind, y = Ozone)) +
geom_point() +
facet_wrap(~ Month) +
labs(title = "Monthly Wind Speed per Ozone",
x = "Wind",
y = "Ozone Layer") +
theme_minimal()
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).