7월 12일 실습

"2023-07-12"

## [1] "2023-07-12"

rm(list=ls()) #1 - 이때까지 저장된거 지우기
library(dplyr) #2

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(psych)
setwd("C:/data")
getwd()

## [1] "C:/data"

data(iris)
data(iris);tail(iris)

##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

head(iris,10)

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1           5.1         3.5          1.4         0.2  setosa
## 2           4.9         3.0          1.4         0.2  setosa
## 3           4.7         3.2          1.3         0.2  setosa
## 4           4.6         3.1          1.5         0.2  setosa
## 5           5.0         3.6          1.4         0.2  setosa
## 6           5.4         3.9          1.7         0.4  setosa
## 7           4.6         3.4          1.4         0.3  setosa
## 8           5.0         3.4          1.5         0.2  setosa
## 9           4.4         2.9          1.4         0.2  setosa
## 10          4.9         3.1          1.5         0.1  setosa

#colname : gender bloodtype height weight
#rowname : 1,2,3
gender<-c("F","M","F")
bloodtype<-c("AB","O","B")
height<-c(170, 175, 165)
weight<-c(70,65,55)
df<-data.frame(gender,bloodtype,height,weight)


head(df[,2])

## [1] "AB" "O"  "B"

head(df[,"bloodtype"])

## [1] "AB" "O"  "B"

head(df[,c(2,3)])

##   bloodtype height
## 1        AB    170
## 2         O    175
## 3         B    165

head(df[,2:3])

##   bloodtype height
## 1        AB    170
## 2         O    175
## 3         B    165

df$bmi=height/weight
df$avg<-(height+weight)/3


colnames(df)[2]<-"blood"


library(ggplot2)

## 
## 다음의 패키지를 부착합니다: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

data("diamonds")

#%>% : 파이프 연산자. : ctrl+shift+M
diamonds %>% dim

## [1] 53940    10

head(diamonds)

## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48

dim(diamonds)

## [1] 53940    10

names(diamonds)

##  [1] "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"  
##  [8] "x"       "y"       "z"

colnames(diamonds)

##  [1] "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"  
##  [8] "x"       "y"       "z"

diamonds1<-diamonds %>% rename(c=clarity,p=price)
head(diamonds1,3)

## # A tibble: 3 × 10
##   carat cut     color c     depth table     p     x     y     z
##   <dbl> <ord>   <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2    61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1    59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1    56.9    65   327  4.05  4.07  2.31

count(diamonds,cut)

## # A tibble: 5 × 2
##   cut           n
##   <ord>     <int>
## 1 Fair       1610
## 2 Good       4906
## 3 Very Good 12082
## 4 Premium   13791
## 5 Ideal     21551

diamonds %>% count(cut)

## # A tibble: 5 × 2
##   cut           n
##   <ord>     <int>
## 1 Fair       1610
## 2 Good       4906
## 3 Very Good 12082
## 4 Premium   13791
## 5 Ideal     21551

##ord는 ordianry의미 ==> 카테고리 데이터.


#select 메서드
diamonds %>%  select(8:10)

## # A tibble: 53,940 × 3
##        x     y     z
##    <dbl> <dbl> <dbl>
##  1  3.95  3.98  2.43
##  2  3.89  3.84  2.31
##  3  4.05  4.07  2.31
##  4  4.2   4.23  2.63
##  5  4.34  4.35  2.75
##  6  3.94  3.96  2.48
##  7  3.95  3.98  2.47
##  8  4.07  4.11  2.53
##  9  3.87  3.78  2.49
## 10  4     4.05  2.39
## # ℹ 53,930 more rows

df<-diamonds %>% select(1,3,5:8)
df2<-diamonds %>% select(-carat,-price)

#slice 메서드
head(diamonds %>% slice(1:5))

## # A tibble: 5 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good    J     SI2      63.3    58   335  4.34  4.35  2.75

head(diamonds %>% slice(-1))

## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 2  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 3  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 4  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 5  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
## 6  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47

diamonds %>% filter(cut=="Good") %>%  head(3)

## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Good  E     VS1      56.9    65   327  4.05  4.07  2.31
## 2  0.31 Good  J     SI2      63.3    58   335  4.34  4.35  2.75
## 3  0.3  Good  J     SI1      64      55   339  4.25  4.28  2.73

#max 메서드
max(diamonds$price)

## [1] 18823

diamonds %>% filter(price == max(price))

## # A tibble: 1 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  2.29 Premium I     VS2      60.8    60 18823   8.5  8.47  5.16

diamonds %>% filter(price == 18823)

## # A tibble: 1 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  2.29 Premium I     VS2      60.8    60 18823   8.5  8.47  5.16

#비교값이 같은 데이터 추출
diamonds %>% filter(cut!="Premium") %>% head(3)

## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.23 Good  E     VS1      56.9    65   327  4.05  4.07  2.31
## 3  0.31 Good  J     SI2      63.3    58   335  4.34  4.35  2.75

#비교값이 작거나 같은 또는 크거나 같은 데이터 추출
diamonds %>% filter(price>=1000) %>% head(3)

## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.7  Ideal E     SI1      62.5    57  2757  5.7   5.72  3.57
## 2  0.86 Fair  E     SI2      55.1    69  2757  6.45  6.33  3.52
## 3  0.7  Ideal G     VS2      61.6    56  2757  5.7   5.67  3.5

diamonds %>% filter(price != 1000) %>% head(3)

## # A tibble: 3 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31

diamonds %>% filter(price == 1000) %>% head(3)

## # A tibble: 3 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.38 Very Good E     VVS2     61.8    56  1000  4.66  4.68  2.88
## 2  0.39 Very Good F     VS1      57.1    61  1000  4.86  4.91  2.79
## 3  0.38 Very Good E     VS1      61.5    58  1000  4.64  4.69  2.87

7월 12일 실습

방지원

2023-07-12