"2023-07-12"
## [1] "2023-07-12"
rm(list=ls()) #1 - 이때까지 저장된거 지우기
library(dplyr) #2 
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
setwd("C:/data")
getwd()
## [1] "C:/data"
data(iris)
data(iris);tail(iris)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica
head(iris,10)
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1           5.1         3.5          1.4         0.2  setosa
## 2           4.9         3.0          1.4         0.2  setosa
## 3           4.7         3.2          1.3         0.2  setosa
## 4           4.6         3.1          1.5         0.2  setosa
## 5           5.0         3.6          1.4         0.2  setosa
## 6           5.4         3.9          1.7         0.4  setosa
## 7           4.6         3.4          1.4         0.3  setosa
## 8           5.0         3.4          1.5         0.2  setosa
## 9           4.4         2.9          1.4         0.2  setosa
## 10          4.9         3.1          1.5         0.1  setosa
#colname : gender bloodtype height weight
#rowname : 1,2,3
gender<-c("F","M","F")
bloodtype<-c("AB","O","B")
height<-c(170, 175, 165)
weight<-c(70,65,55)
df<-data.frame(gender,bloodtype,height,weight)


head(df[,2])
## [1] "AB" "O"  "B"
head(df[,"bloodtype"])
## [1] "AB" "O"  "B"
head(df[,c(2,3)])
##   bloodtype height
## 1        AB    170
## 2         O    175
## 3         B    165
head(df[,2:3])
##   bloodtype height
## 1        AB    170
## 2         O    175
## 3         B    165
df$bmi=height/weight
df$avg<-(height+weight)/3


colnames(df)[2]<-"blood"


library(ggplot2)
## 
## 다음의 패키지를 부착합니다: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
data("diamonds")

#%>% : 파이프 연산자. : ctrl+shift+M
diamonds %>% dim
## [1] 53940    10
head(diamonds)
## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
dim(diamonds)
## [1] 53940    10
names(diamonds)
##  [1] "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"  
##  [8] "x"       "y"       "z"
colnames(diamonds)
##  [1] "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"  
##  [8] "x"       "y"       "z"
diamonds1<-diamonds %>% rename(c=clarity,p=price)
head(diamonds1,3)
## # A tibble: 3 × 10
##   carat cut     color c     depth table     p     x     y     z
##   <dbl> <ord>   <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2    61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1    59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1    56.9    65   327  4.05  4.07  2.31
count(diamonds,cut)
## # A tibble: 5 × 2
##   cut           n
##   <ord>     <int>
## 1 Fair       1610
## 2 Good       4906
## 3 Very Good 12082
## 4 Premium   13791
## 5 Ideal     21551
diamonds %>% count(cut)
## # A tibble: 5 × 2
##   cut           n
##   <ord>     <int>
## 1 Fair       1610
## 2 Good       4906
## 3 Very Good 12082
## 4 Premium   13791
## 5 Ideal     21551
##ord는 ordianry의미 ==> 카테고리 데이터.


#select 메서드
diamonds %>%  select(8:10)
## # A tibble: 53,940 × 3
##        x     y     z
##    <dbl> <dbl> <dbl>
##  1  3.95  3.98  2.43
##  2  3.89  3.84  2.31
##  3  4.05  4.07  2.31
##  4  4.2   4.23  2.63
##  5  4.34  4.35  2.75
##  6  3.94  3.96  2.48
##  7  3.95  3.98  2.47
##  8  4.07  4.11  2.53
##  9  3.87  3.78  2.49
## 10  4     4.05  2.39
## # ℹ 53,930 more rows
df<-diamonds %>% select(1,3,5:8)
df2<-diamonds %>% select(-carat,-price)

#slice 메서드
head(diamonds %>% slice(1:5))
## # A tibble: 5 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good    J     SI2      63.3    58   335  4.34  4.35  2.75
head(diamonds %>% slice(-1))
## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 2  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 3  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 4  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 5  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
## 6  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47
diamonds %>% filter(cut=="Good") %>%  head(3)
## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Good  E     VS1      56.9    65   327  4.05  4.07  2.31
## 2  0.31 Good  J     SI2      63.3    58   335  4.34  4.35  2.75
## 3  0.3  Good  J     SI1      64      55   339  4.25  4.28  2.73
#max 메서드
max(diamonds$price)
## [1] 18823
diamonds %>% filter(price == max(price))
## # A tibble: 1 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  2.29 Premium I     VS2      60.8    60 18823   8.5  8.47  5.16
diamonds %>% filter(price == 18823)
## # A tibble: 1 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  2.29 Premium I     VS2      60.8    60 18823   8.5  8.47  5.16
#비교값이 같은 데이터 추출
diamonds %>% filter(cut!="Premium") %>% head(3)
## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.23 Good  E     VS1      56.9    65   327  4.05  4.07  2.31
## 3  0.31 Good  J     SI2      63.3    58   335  4.34  4.35  2.75
#비교값이 작거나 같은 또는 크거나 같은 데이터 추출
diamonds %>% filter(price>=1000) %>% head(3)
## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.7  Ideal E     SI1      62.5    57  2757  5.7   5.72  3.57
## 2  0.86 Fair  E     SI2      55.1    69  2757  6.45  6.33  3.52
## 3  0.7  Ideal G     VS2      61.6    56  2757  5.7   5.67  3.5
diamonds %>% filter(price != 1000) %>% head(3)
## # A tibble: 3 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31
diamonds %>% filter(price == 1000) %>% head(3)
## # A tibble: 3 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.38 Very Good E     VVS2     61.8    56  1000  4.66  4.68  2.88
## 2  0.39 Very Good F     VS1      57.1    61  1000  4.86  4.91  2.79
## 3  0.38 Very Good E     VS1      61.5    58  1000  4.64  4.69  2.87