library(ggplot2)
data("diamonds")
summary(diamonds)
## carat cut color clarity depth
## Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065 Min. :43.00
## 1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258 1st Qu.:61.00
## Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194 Median :61.80
## Mean :0.7979 Premium :13791 G:11292 VS1 : 8171 Mean :61.75
## 3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066 3rd Qu.:62.50
## Max. :5.0100 I: 5422 VVS1 : 3655 Max. :79.00
## J: 2808 (Other): 2531
## table price x y
## Min. :43.00 Min. : 326 Min. : 0.000 Min. : 0.000
## 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710 1st Qu.: 4.720
## Median :57.00 Median : 2401 Median : 5.700 Median : 5.710
## Mean :57.46 Mean : 3933 Mean : 5.731 Mean : 5.735
## 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540 3rd Qu.: 6.540
## Max. :95.00 Max. :18823 Max. :10.740 Max. :58.900
##
## z
## Min. : 0.000
## 1st Qu.: 2.910
## Median : 3.530
## Mean : 3.539
## 3rd Qu.: 4.040
## Max. :31.800
##
ggplot(diamonds, aes(x=price)) +
geom_histogram(binwidth=500, fill="skyblue", color="black") +
labs(title="다이아몬드 가격 분포", x="가격", y="빈도")

# 평균 > 중앙값 > 최빈값 크기 순서 문제 출제
# 히스토그램 시각화 결과
# x축 계급값 : 계급값이 너무 세분화되어 있으면
# 연산속도에 영향을 줌.
ggplot(diamonds, aes(x = carat, y = price)) +
geom_point(alpha = 0.2) +
labs(title = "캐럿과 가격 관계", x = "Carat", y = "Price")

# 수치형 데이터간의 시각화를 산점도라 한다.
# carat 크기가 클수록 가격도 상승함을 알 수 있다.
ggplot(diamonds, aes(x = carat, y = price, color = clarity)) +
geom_point(alpha = 0.6) +
labs(title = "캐럿과 가격 관계", x = "Carat", y = "Price") +
theme_minimal()

ggplot(diamonds, aes(x = cut, y = price)) +
geom_boxplot(fill = "lightpink") +
labs(title = "컷 등급별 가격 분포", x = "Cut", y = "Price")

ggplot(diamonds, aes(x = carat, y = price)) +
geom_point(alpha = 0.3) +
facet_wrap(~ cut) +
labs(title = "컷 등급별 캐럿-가격 관계")

#install.packages("gapminder")
library(gapminder)
data("gapminder")
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
#options(scipen = 999) 과학적표기 -> 일반적 숫자
#options(scipen = -999) 일반적 숫자 -> 과학적 표기
ggplot(gapminder, aes(x=gdpPercap, y=lifeExp, size=pop, color=continent)) +
geom_point(alpha = 0.6) +
scale_x_log10() +
facet_wrap(~ year) +
labs(title="연도별 GDP와 기대수명의 관계",
x="1인당 GDP (log)", y="기대수명") +
theme_light()

'install.packages("gganimate")
install.packages("gifski") gif 저장 시 필요
install.packages("transformer")'
## [1] "install.packages(\"gganimate\")\ninstall.packages(\"gifski\") gif 저장 시 필요\ninstall.packages(\"transformer\")"
library(gganimate)
library(gapminder)
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp,
size = pop, color = continent)) +
geom_point(alpha = 0.7, show.legend = TRUE) +
scale_x_log10() +
labs(title = "Year: {frame_time}",
x = "1인당 GDP (log)", y = "기대수명") +
theme_minimal() +
transition_time(year) +
ease_aes('linear')
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp,
size = pop, color = continent, label = country)) +
geom_point(alpha = 0.7, show.legend = FALSE) +
geom_text(size = 3, vjust = -1, check_overlap = TRUE) + # 국가 이름 표시
scale_x_log10() +
labs(title = "연도별 세계 발전 변화: {frame_time}",
x = "1인당 GDP (로그 스케일)", y = "기대수명") +
theme_minimal() +
transition_time(year) +
ease_aes('linear')
