library("dplyr")
## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다
data(diamonds)
diamonds %>% filter(price!=1000&cut=="Ideal") %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.23 Ideal J VS1 62.8 56 340 3.93 3.9 2.46
## 3 0.31 Ideal J SI2 62.2 54 344 4.35 4.37 2.71
diamonds %>% count(cut)
## # A tibble: 5 × 2
## cut n
## <ord> <int>
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
diamonds %>% count(color)
## # A tibble: 7 × 2
## color n
## <ord> <int>
## 1 D 6775
## 2 E 9797
## 3 F 9542
## 4 G 11292
## 5 H 8304
## 6 I 5422
## 7 J 2808
diamonds %>% count(clarity)
## # A tibble: 8 × 2
## clarity n
## <ord> <int>
## 1 I1 741
## 2 SI2 9194
## 3 SI1 13065
## 4 VS2 12258
## 5 VS1 8171
## 6 VVS2 5066
## 7 VVS1 3655
## 8 IF 1790
table(diamonds$cut)
##
## Fair Good Very Good Premium Ideal
## 1610 4906 12082 13791 21551
diamonds %>% filter(price!=1000&cut=="Ideal"&color=="E")%>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.26 Ideal E VVS2 62.9 58 554 4.02 4.06 2.54
## 3 0.7 Ideal E SI1 62.5 57 2757 5.7 5.72 3.57
diamonds %>% filter(carat<1|carat>5) %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
diamonds %>% filter(cut%in%c("ideal","Good")) %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 2 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 3 0.3 Good J SI1 64 55 339 4.25 4.28 2.73
diamonds %>% select(carat,depth,price) %>% filter(depth==max(depth)|price==min(price))
## # A tibble: 4 × 3
## carat depth price
## <dbl> <dbl> <int>
## 1 0.23 61.5 326
## 2 0.21 59.8 326
## 3 0.5 79 2579
## 4 0.5 79 2579
library(tidyverse)
## Warning: 패키지 'tidyverse'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'tibble'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'tidyr'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'readr'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'purrr'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'stringr'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'forcats'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'lubridate'는 R 버전 4.2.3에서 작성되었습니다
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
diamonds %>% mutate(ratio=price/carat,Double=ratio*2) %>% head(3)
## # A tibble: 3 × 12
## carat cut color clarity depth table price x y z ratio Double
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 1417. 2835.
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 1552. 3105.
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 1422. 2843.
diamonds %>% summarize(mean(price))
## # A tibble: 1 × 1
## `mean(price)`
## <dbl>
## 1 3933.
diamonds %>% summarize(avgprice=mean(price),
MedianPrice=median(price),
avgcarat=mean(carat))
## # A tibble: 1 × 3
## avgprice MedianPrice avgcarat
## <dbl> <dbl> <dbl>
## 1 3933. 2401 0.798
diamonds %>% group_by(cut) %>%
summarize(AvgPrice=mean(price),SumCarat=sum(carat))
## # A tibble: 5 × 3
## cut AvgPrice SumCarat
## <ord> <dbl> <dbl>
## 1 Fair 4359. 1684.
## 2 Good 3929. 4166.
## 3 Very Good 3982. 9743.
## 4 Premium 4584. 12301.
## 5 Ideal 3458. 15147.
diamonds %>% group_by(cut) %>%
summarize(n=n()) %>%
mutate(tota=sum(n),pct=n/tota*100)
## # A tibble: 5 × 4
## cut n tota pct
## <ord> <int> <int> <dbl>
## 1 Fair 1610 53940 2.98
## 2 Good 4906 53940 9.10
## 3 Very Good 12082 53940 22.4
## 4 Premium 13791 53940 25.6
## 5 Ideal 21551 53940 40.0
quantile(diamonds$price)
## 0% 25% 50% 75% 100%
## 326.00 950.00 2401.00 5324.25 18823.00
diamonds1 <- diamonds %>% mutate(price_class=ifelse(price>=5324.25,"best",ifelse(price>=2401,"good",ifelse(price>=950,"normal","bad"))))
table(diamonds1$price_class)
##
## bad best good normal
## 13483 13485 13496 13476
diamonds %>% group_by(cut) %>% summarize(AvgPrice=mean(price)) %>% arrange(desc(AvgPrice))
## # A tibble: 5 × 2
## cut AvgPrice
## <ord> <dbl>
## 1 Premium 4584.
## 2 Fair 4359.
## 3 Very Good 3982.
## 4 Good 3929.
## 5 Ideal 3458.