예 : R에 내장되어 있는 cars 라는 데이터의 요약 정보
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
예 : R에 내장되어 있는 pressure 라는 데이터로 그래프 그리기
plot(pressure)
print("welcome")
## [1] "welcome"
1+2+3+4+5
## [1] 15
a <- 1+2+3+4+5
a
## [1] 15
b <- c(1,2,3,4,5)
b
## [1] 1 2 3 4 5
c <- 1:5
c
## [1] 1 2 3 4 5
d <- sum(1:5)
d
## [1] 15
e <- "welcome"
e
## [1] "welcome"
f <- c("welcome", "sir")
f
## [1] "welcome" "sir"
g <- paste("welcome", "sir")
g
## [1] "welcome sir"
remove(a) / rm(a)
install.packages(“패키지 이름”) : PC에 패키지 설치, 우하단 “Packages”에서 확인 가능, 1회만 설치하면 됨
library(패키지 이름) : PC에 설치된 패키지를 R에서 사용할 수 있도록 부착하는 것, R을 재구동 할 때마다 실행해야 함
install.packages("ggplot2")
library(ggplot2)
Working Directory 설정
setwd("~/Downloads")데이터 불러오기
install.packages("haven")
library(haven)
read_sav("data.sav")install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
sum(1:10)
## [1] 55
1:10 %>% sum()
## [1] 55
?mpg : 데이터 mpg에 대한 설명 보기
mpg 정리하기
mpg
## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manu… f 21 29 p comp…
## 3 audi a4 2 2008 4 manu… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manu… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto… f 18 27 p comp…
## 8 audi a4 quattro 1.8 1999 4 manu… 4 18 26 p comp…
## 9 audi a4 quattro 1.8 1999 4 auto… 4 16 25 p comp…
## 10 audi a4 quattro 2 2008 4 manu… 4 20 28 p comp…
## # ℹ 224 more rows
summary(mpg)
## manufacturer model displ year
## Length:234 Length:234 Min. :1.600 Min. :1999
## Class :character Class :character 1st Qu.:2.400 1st Qu.:1999
## Mode :character Mode :character Median :3.300 Median :2004
## Mean :3.472 Mean :2004
## 3rd Qu.:4.600 3rd Qu.:2008
## Max. :7.000 Max. :2008
## cyl trans drv cty
## Min. :4.000 Length:234 Length:234 Min. : 9.00
## 1st Qu.:4.000 Class :character Class :character 1st Qu.:14.00
## Median :6.000 Mode :character Mode :character Median :17.00
## Mean :5.889 Mean :16.86
## 3rd Qu.:8.000 3rd Qu.:19.00
## Max. :8.000 Max. :35.00
## hwy fl class
## Min. :12.00 Length:234 Length:234
## 1st Qu.:18.00 Class :character Class :character
## Median :24.00 Mode :character Mode :character
## Mean :23.44
## 3rd Qu.:27.00
## Max. :44.00
str(mpg)
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
## $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
## $ model : chr [1:234] "a4" "a4" "a4" "a4" ...
## $ displ : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr [1:234] "f" "f" "f" "f" ...
## $ cty : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr [1:234] "p" "p" "p" "p" ...
## $ class : chr [1:234] "compact" "compact" "compact" "compact" ...
head(mpg)
## # A tibble: 6 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa…
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa…
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa…
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa…
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa…
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa…
tail(mpg)
## # A tibble: 6 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 volkswagen passat 1.8 1999 4 auto(l5) f 18 29 p mids…
## 2 volkswagen passat 2 2008 4 auto(s6) f 19 28 p mids…
## 3 volkswagen passat 2 2008 4 manual(m6) f 21 29 p mids…
## 4 volkswagen passat 2.8 1999 6 auto(l5) f 16 26 p mids…
## 5 volkswagen passat 2.8 1999 6 manual(m5) f 18 26 p mids…
## 6 volkswagen passat 3.6 2008 6 auto(s6) f 17 26 p mids…
dim(mpg)
## [1] 234 11
mpg.audi <- mpg %>% filter(manufacturer == "audi")
mpg.audi
## # A tibble: 18 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manu… f 21 29 p comp…
## 3 audi a4 2 2008 4 manu… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manu… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto… f 18 27 p comp…
## 8 audi a4 quattro 1.8 1999 4 manu… 4 18 26 p comp…
## 9 audi a4 quattro 1.8 1999 4 auto… 4 16 25 p comp…
## 10 audi a4 quattro 2 2008 4 manu… 4 20 28 p comp…
## 11 audi a4 quattro 2 2008 4 auto… 4 19 27 p comp…
## 12 audi a4 quattro 2.8 1999 6 auto… 4 15 25 p comp…
## 13 audi a4 quattro 2.8 1999 6 manu… 4 17 25 p comp…
## 14 audi a4 quattro 3.1 2008 6 auto… 4 17 25 p comp…
## 15 audi a4 quattro 3.1 2008 6 manu… 4 15 25 p comp…
## 16 audi a6 quattro 2.8 1999 6 auto… 4 15 24 p mids…
## 17 audi a6 quattro 3.1 2008 6 auto… 4 17 25 p mids…
## 18 audi a6 quattro 4.2 2008 8 auto… 4 16 23 p mids…
mpg.year <- mpg %>% select(manufacturer, model, year)
mpg.year
## # A tibble: 234 × 3
## manufacturer model year
## <chr> <chr> <int>
## 1 audi a4 1999
## 2 audi a4 1999
## 3 audi a4 2008
## 4 audi a4 2008
## 5 audi a4 1999
## 6 audi a4 1999
## 7 audi a4 2008
## 8 audi a4 quattro 1999
## 9 audi a4 quattro 1999
## 10 audi a4 quattro 2008
## # ℹ 224 more rows
mpg <- mpg %>% arrange(cty)
mpg
## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 dodge dakota pi… 4.7 2008 8 auto… 4 9 12 e pick…
## 2 dodge durango 4… 4.7 2008 8 auto… 4 9 12 e suv
## 3 dodge ram 1500 … 4.7 2008 8 auto… 4 9 12 e pick…
## 4 dodge ram 1500 … 4.7 2008 8 manu… 4 9 12 e pick…
## 5 jeep grand che… 4.7 2008 8 auto… 4 9 12 e suv
## 6 chevrolet c1500 sub… 5.3 2008 8 auto… r 11 15 e suv
## 7 chevrolet k1500 tah… 5.3 2008 8 auto… 4 11 14 e suv
## 8 chevrolet k1500 tah… 5.7 1999 8 auto… 4 11 15 r suv
## 9 dodge caravan 2… 3.3 2008 6 auto… f 11 17 e mini…
## 10 dodge dakota pi… 5.2 1999 8 manu… 4 11 17 r pick…
## # ℹ 224 more rows
mpg <- mpg %>% arrange(-cty)
mpg
## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 volkswagen new beetle 1.9 1999 4 manu… f 35 44 d subc…
## 2 volkswagen jetta 1.9 1999 4 manu… f 33 44 d comp…
## 3 volkswagen new beetle 1.9 1999 4 auto… f 29 41 d subc…
## 4 honda civic 1.6 1999 4 manu… f 28 33 r subc…
## 5 toyota corolla 1.8 2008 4 manu… f 28 37 r comp…
## 6 honda civic 1.8 2008 4 manu… f 26 34 r subc…
## 7 toyota corolla 1.8 1999 4 manu… f 26 35 r comp…
## 8 toyota corolla 1.8 2008 4 auto… f 26 35 r comp…
## 9 honda civic 1.6 1999 4 manu… f 25 32 r subc…
## 10 honda civic 1.8 2008 4 auto… f 25 36 r subc…
## # ℹ 224 more rows
mpg <- mpg %>% arrange(cty, hwy)
mpg
## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 dodge dakota pi… 4.7 2008 8 auto… 4 9 12 e pick…
## 2 dodge durango 4… 4.7 2008 8 auto… 4 9 12 e suv
## 3 dodge ram 1500 … 4.7 2008 8 auto… 4 9 12 e pick…
## 4 dodge ram 1500 … 4.7 2008 8 manu… 4 9 12 e pick…
## 5 jeep grand che… 4.7 2008 8 auto… 4 9 12 e suv
## 6 chevrolet k1500 tah… 5.3 2008 8 auto… 4 11 14 e suv
## 7 jeep grand che… 6.1 2008 8 auto… 4 11 14 p suv
## 8 chevrolet c1500 sub… 5.3 2008 8 auto… r 11 15 e suv
## 9 chevrolet k1500 tah… 5.7 1999 8 auto… 4 11 15 r suv
## 10 dodge dakota pi… 5.2 1999 8 auto… 4 11 15 r pick…
## # ℹ 224 more rows
mpg <- mpg %>% mutate(century = ceiling(year/100))
mpg
## # A tibble: 234 × 12
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 dodge dakota pi… 4.7 2008 8 auto… 4 9 12 e pick…
## 2 dodge durango 4… 4.7 2008 8 auto… 4 9 12 e suv
## 3 dodge ram 1500 … 4.7 2008 8 auto… 4 9 12 e pick…
## 4 dodge ram 1500 … 4.7 2008 8 manu… 4 9 12 e pick…
## 5 jeep grand che… 4.7 2008 8 auto… 4 9 12 e suv
## 6 chevrolet k1500 tah… 5.3 2008 8 auto… 4 11 14 e suv
## 7 jeep grand che… 6.1 2008 8 auto… 4 11 14 p suv
## 8 chevrolet c1500 sub… 5.3 2008 8 auto… r 11 15 e suv
## 9 chevrolet k1500 tah… 5.7 1999 8 auto… 4 11 15 r suv
## 10 dodge dakota pi… 5.2 1999 8 auto… 4 11 15 r pick…
## # ℹ 224 more rows
## # ℹ 1 more variable: century <dbl>
mpg <- mpg %>% mutate(cyl6 = if_else(cyl>=6, "6이상", "6미만"))
mpg
## # A tibble: 234 × 13
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 dodge dakota pi… 4.7 2008 8 auto… 4 9 12 e pick…
## 2 dodge durango 4… 4.7 2008 8 auto… 4 9 12 e suv
## 3 dodge ram 1500 … 4.7 2008 8 auto… 4 9 12 e pick…
## 4 dodge ram 1500 … 4.7 2008 8 manu… 4 9 12 e pick…
## 5 jeep grand che… 4.7 2008 8 auto… 4 9 12 e suv
## 6 chevrolet k1500 tah… 5.3 2008 8 auto… 4 11 14 e suv
## 7 jeep grand che… 6.1 2008 8 auto… 4 11 14 p suv
## 8 chevrolet c1500 sub… 5.3 2008 8 auto… r 11 15 e suv
## 9 chevrolet k1500 tah… 5.7 1999 8 auto… 4 11 15 r suv
## 10 dodge dakota pi… 5.2 1999 8 auto… 4 11 15 r pick…
## # ℹ 224 more rows
## # ℹ 2 more variables: century <dbl>, cyl6 <chr>
mpg.grouped <- mpg %>%
group_by(manufacturer) %>%
mutate(mean.displ = mean(displ))
mpg.grouped
## # A tibble: 234 × 14
## # Groups: manufacturer [15]
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 dodge dakota pi… 4.7 2008 8 auto… 4 9 12 e pick…
## 2 dodge durango 4… 4.7 2008 8 auto… 4 9 12 e suv
## 3 dodge ram 1500 … 4.7 2008 8 auto… 4 9 12 e pick…
## 4 dodge ram 1500 … 4.7 2008 8 manu… 4 9 12 e pick…
## 5 jeep grand che… 4.7 2008 8 auto… 4 9 12 e suv
## 6 chevrolet k1500 tah… 5.3 2008 8 auto… 4 11 14 e suv
## 7 jeep grand che… 6.1 2008 8 auto… 4 11 14 p suv
## 8 chevrolet c1500 sub… 5.3 2008 8 auto… r 11 15 e suv
## 9 chevrolet k1500 tah… 5.7 1999 8 auto… 4 11 15 r suv
## 10 dodge dakota pi… 5.2 1999 8 auto… 4 11 15 r pick…
## # ℹ 224 more rows
## # ℹ 3 more variables: century <dbl>, cyl6 <chr>, mean.displ <dbl>
mpg.grouped <- mpg.grouped %>% select(manufacturer, mean.displ)
mpg.grouped <- unique(mpg.grouped)
mpg.grouped
## # A tibble: 15 × 2
## # Groups: manufacturer [15]
## manufacturer mean.displ
## <chr> <dbl>
## 1 dodge 4.38
## 2 jeep 4.58
## 3 chevrolet 5.06
## 4 ford 4.54
## 5 land rover 4.3
## 6 toyota 2.95
## 7 lincoln 5.4
## 8 nissan 3.27
## 9 mercury 4.4
## 10 audi 2.54
## 11 volkswagen 2.26
## 12 hyundai 2.43
## 13 pontiac 3.96
## 14 subaru 2.46
## 15 honda 1.71
mpg.merge <- merge(mpg, mpg.grouped, by="manufacturer", all.x = T)
summary(mpg.merge)
## manufacturer model displ year
## Length:234 Length:234 Min. :1.600 Min. :1999
## Class :character Class :character 1st Qu.:2.400 1st Qu.:1999
## Mode :character Mode :character Median :3.300 Median :2004
## Mean :3.472 Mean :2004
## 3rd Qu.:4.600 3rd Qu.:2008
## Max. :7.000 Max. :2008
## cyl trans drv cty
## Min. :4.000 Length:234 Length:234 Min. : 9.00
## 1st Qu.:4.000 Class :character Class :character 1st Qu.:14.00
## Median :6.000 Mode :character Mode :character Median :17.00
## Mean :5.889 Mean :16.86
## 3rd Qu.:8.000 3rd Qu.:19.00
## Max. :8.000 Max. :35.00
## hwy fl class century
## Min. :12.00 Length:234 Length:234 Min. :20.0
## 1st Qu.:18.00 Class :character Class :character 1st Qu.:20.0
## Median :24.00 Mode :character Mode :character Median :20.5
## Mean :23.44 Mean :20.5
## 3rd Qu.:27.00 3rd Qu.:21.0
## Max. :44.00 Max. :21.0
## cyl6 mean.displ
## Length:234 Min. :1.711
## Class :character 1st Qu.:2.457
## Mode :character Median :3.269
## Mean :3.472
## 3rd Qu.:4.395
## Max. :5.400
히스토그램
ggplot() +
geom_histogram(data = mpg, mapping = aes(x = displ))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
빈도
ggplot() +
geom_density(data = mpg, mapping = aes(x = displ))
분산도에 선 추가 (95% 신뢰구간)
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = cty)) +
geom_smooth(data = mpg, mapping = aes(x = displ, y = cty), method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
분산도에 색 추가
ggplot(data = mpg, mapping = aes(x = displ, y = cty, group = cyl, colour = cyl)) +
geom_point()
ggplot(data = mpg, mapping = aes(x = displ, y = cty, group = factor(cyl), colour = factor(cyl))) + geom_point()
ggplot(data = mpg, mapping = aes(x = displ, y =cty)) +
geom_point()
점, 텍스트 추가
ggplot(data = mpg, mapping = aes(x = displ, y =cty)) +
geom_point() +
annotate(geom = "point", x = c(3, 3.5), y = c(25, 30), colour = "red") +
annotate(geom = "text", x = 5, y = 25, label = "added red points")
그래프 나눠 그리기
ggplot(data = mpg, mapping = aes(x = displ, y = cty)) +
geom_point() +
facet_wrap(~cyl)
ggplot(data = mpg, mapping = aes(x = displ, y = cty)) +
geom_point() +
facet_grid(.~cyl)
ggplot(data = mpg, mapping = aes(x = displ, y = cty)) +
geom_point() +
facet_grid(cyl~.)
내장 데이터인 mtcars 이용
mtcars <- mtcars
?mtcars
result1 <- glm(vs ~ mpg, family = binomial, data = mtcars)
summary(result1)
##
## Call:
## glm(formula = vs ~ mpg, family = binomial, data = mtcars)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.8331 3.1623 -2.793 0.00522 **
## mpg 0.4304 0.1584 2.717 0.00659 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 43.860 on 31 degrees of freedom
## Residual deviance: 25.533 on 30 degrees of freedom
## AIC: 29.533
##
## Number of Fisher Scoring iterations: 6
result2 <- glm(vs ~ mpg + am, family = binomial, data = mtcars)
summary(result2)
##
## Call:
## glm(formula = vs ~ mpg + am, family = binomial, data = mtcars)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.7051 4.6252 -2.747 0.00602 **
## mpg 0.6809 0.2524 2.698 0.00697 **
## am -3.0073 1.5995 -1.880 0.06009 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 43.860 on 31 degrees of freedom
## Residual deviance: 20.646 on 29 degrees of freedom
## AIC: 26.646
##
## Number of Fisher Scoring iterations: 6