"2023-07-12"
## [1] "2023-07-12"
rm(list=ls()) #1 - 이때까지 저장된거 지우기
library(dplyr) #2
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
setwd("C:/data")
getwd()
## [1] "C:/data"
data(iris)
data(iris);tail(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
head(iris,10)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
#colname : gender bloodtype height weight
#rowname : 1,2,3
gender<-c("F","M","F")
bloodtype<-c("AB","O","B")
height<-c(170, 175, 165)
weight<-c(70,65,55)
df<-data.frame(gender,bloodtype,height,weight)
head(df[,2])
## [1] "AB" "O" "B"
head(df[,"bloodtype"])
## [1] "AB" "O" "B"
head(df[,c(2,3)])
## bloodtype height
## 1 AB 170
## 2 O 175
## 3 B 165
head(df[,2:3])
## bloodtype height
## 1 AB 170
## 2 O 175
## 3 B 165
df$bmi=height/weight
df$avg<-(height+weight)/3
colnames(df)[2]<-"blood"
library(ggplot2)
##
## 다음의 패키지를 부착합니다: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
data("diamonds")
#%>% : 파이프 연산자. : ctrl+shift+M
diamonds %>% dim
## [1] 53940 10
head(diamonds)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
dim(diamonds)
## [1] 53940 10
names(diamonds)
## [1] "carat" "cut" "color" "clarity" "depth" "table" "price"
## [8] "x" "y" "z"
colnames(diamonds)
## [1] "carat" "cut" "color" "clarity" "depth" "table" "price"
## [8] "x" "y" "z"
diamonds1<-diamonds %>% rename(c=clarity,p=price)
head(diamonds1,3)
## # A tibble: 3 × 10
## carat cut color c depth table p x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
count(diamonds,cut)
## # A tibble: 5 × 2
## cut n
## <ord> <int>
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
diamonds %>% count(cut)
## # A tibble: 5 × 2
## cut n
## <ord> <int>
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
##ord는 ordianry의미 ==> 카테고리 데이터.
#select 메서드
diamonds %>% select(8:10)
## # A tibble: 53,940 × 3
## x y z
## <dbl> <dbl> <dbl>
## 1 3.95 3.98 2.43
## 2 3.89 3.84 2.31
## 3 4.05 4.07 2.31
## 4 4.2 4.23 2.63
## 5 4.34 4.35 2.75
## 6 3.94 3.96 2.48
## 7 3.95 3.98 2.47
## 8 4.07 4.11 2.53
## 9 3.87 3.78 2.49
## 10 4 4.05 2.39
## # ℹ 53,930 more rows
df<-diamonds %>% select(1,3,5:8)
df2<-diamonds %>% select(-carat,-price)
#slice 메서드
head(diamonds %>% slice(1:5))
## # A tibble: 5 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
head(diamonds %>% slice(-1))
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 2 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 3 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 4 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 5 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 6 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
diamonds %>% filter(cut=="Good") %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 2 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 3 0.3 Good J SI1 64 55 339 4.25 4.28 2.73
#max 메서드
max(diamonds$price)
## [1] 18823
diamonds %>% filter(price == max(price))
## # A tibble: 1 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
diamonds %>% filter(price == 18823)
## # A tibble: 1 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
#비교값이 같은 데이터 추출
diamonds %>% filter(cut!="Premium") %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 3 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
#비교값이 작거나 같은 또는 크거나 같은 데이터 추출
diamonds %>% filter(price>=1000) %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.7 Ideal E SI1 62.5 57 2757 5.7 5.72 3.57
## 2 0.86 Fair E SI2 55.1 69 2757 6.45 6.33 3.52
## 3 0.7 Ideal G VS2 61.6 56 2757 5.7 5.67 3.5
diamonds %>% filter(price != 1000) %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
diamonds %>% filter(price == 1000) %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.38 Very Good E VVS2 61.8 56 1000 4.66 4.68 2.88
## 2 0.39 Very Good F VS1 57.1 61 1000 4.86 4.91 2.79
## 3 0.38 Very Good E VS1 61.5 58 1000 4.64 4.69 2.87