knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# 데이터 확인
head(airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
# 1. 산점도
ggplot(airquality, aes(x = Wind, y = Ozone)) +
geom_point(color = "steelblue", alpha = 0.7) +
labs(title = "WindvsOzone",
x = "Wind (mph)",
y = "Ozone (ppb)") +
theme_minimal() +
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).
# 2. 박스플롯
ggplot(airquality, aes(x = factor(Month), y = Temp)) +
geom_boxplot(fill = "tomato", alpha = 0.6) +
labs(title = "Monthly Temperature Distribution",
x = "Month",
y = "Temperature (°F)") +
theme_light() +
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))
# 3. 히스토그램
ggplot(airquality, aes(x = Temp)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
labs(title = "Temperature Distribution Histogram",
x = "Temperature (°F)",
y = "Frequency") +
theme_classic() +
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))
## 2. 각
그래프를 통해 데이터에서 어떠한 정보를 얻을 수 있는지 간단히 R
markdown을 이용해서 설명해보자.
knitr::opts_chunk$set(echo = TRUE)
#scatterplot(산점도): shows the negative correlation between wind speed and ozone levels
#boxplot:Compares temperature distributions across months, showing a general trend of temperature increasing from may to july, and slightly decreasing afterwards
#Histogram: Shows how frequently different temperature ranges occurred.