library("dplyr")
## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다
data(diamonds)
diamonds %>% filter(price!=1000&cut=="Ideal") %>% head(3)
## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.23 Ideal J     VS1      62.8    56   340  3.93  3.9   2.46
## 3  0.31 Ideal J     SI2      62.2    54   344  4.35  4.37  2.71
diamonds %>% count(cut)
## # A tibble: 5 × 2
##   cut           n
##   <ord>     <int>
## 1 Fair       1610
## 2 Good       4906
## 3 Very Good 12082
## 4 Premium   13791
## 5 Ideal     21551
diamonds %>% count(color)
## # A tibble: 7 × 2
##   color     n
##   <ord> <int>
## 1 D      6775
## 2 E      9797
## 3 F      9542
## 4 G     11292
## 5 H      8304
## 6 I      5422
## 7 J      2808
diamonds %>% count(clarity)
## # A tibble: 8 × 2
##   clarity     n
##   <ord>   <int>
## 1 I1        741
## 2 SI2      9194
## 3 SI1     13065
## 4 VS2     12258
## 5 VS1      8171
## 6 VVS2     5066
## 7 VVS1     3655
## 8 IF       1790
table(diamonds$cut)
## 
##      Fair      Good Very Good   Premium     Ideal 
##      1610      4906     12082     13791     21551
diamonds %>% filter(price!=1000&cut=="Ideal"&color=="E")%>% head(3)
## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.26 Ideal E     VVS2     62.9    58   554  4.02  4.06  2.54
## 3  0.7  Ideal E     SI1      62.5    57  2757  5.7   5.72  3.57
diamonds %>% filter(carat<1|carat>5) %>% head(3)
## # A tibble: 3 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31
diamonds %>% filter(cut%in%c("ideal","Good")) %>% head(3)
## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Good  E     VS1      56.9    65   327  4.05  4.07  2.31
## 2  0.31 Good  J     SI2      63.3    58   335  4.34  4.35  2.75
## 3  0.3  Good  J     SI1      64      55   339  4.25  4.28  2.73
diamonds %>% select(carat,depth,price) %>% filter(depth==max(depth)|price==min(price))
## # A tibble: 4 × 3
##   carat depth price
##   <dbl> <dbl> <int>
## 1  0.23  61.5   326
## 2  0.21  59.8   326
## 3  0.5   79    2579
## 4  0.5   79    2579
library(tidyverse)
## Warning: 패키지 'tidyverse'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'tibble'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'tidyr'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'readr'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'purrr'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'stringr'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'forcats'는 R 버전 4.2.3에서 작성되었습니다
## Warning: 패키지 'lubridate'는 R 버전 4.2.3에서 작성되었습니다
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
diamonds %>% mutate(ratio=price/carat,Double=ratio*2) %>% head(3)
## # A tibble: 3 × 12
##   carat cut     color clarity depth table price     x     y     z ratio Double
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43 1417.  2835.
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31 1552.  3105.
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31 1422.  2843.
diamonds %>% summarize(mean(price))
## # A tibble: 1 × 1
##   `mean(price)`
##           <dbl>
## 1         3933.
diamonds %>% summarize(avgprice=mean(price),
MedianPrice=median(price),
avgcarat=mean(carat))
## # A tibble: 1 × 3
##   avgprice MedianPrice avgcarat
##      <dbl>       <dbl>    <dbl>
## 1    3933.        2401    0.798
diamonds %>% group_by(cut) %>% 
summarize(AvgPrice=mean(price),SumCarat=sum(carat))
## # A tibble: 5 × 3
##   cut       AvgPrice SumCarat
##   <ord>        <dbl>    <dbl>
## 1 Fair         4359.    1684.
## 2 Good         3929.    4166.
## 3 Very Good    3982.    9743.
## 4 Premium      4584.   12301.
## 5 Ideal        3458.   15147.
diamonds %>% group_by(cut) %>% 
  summarize(n=n()) %>% 
  mutate(tota=sum(n),pct=n/tota*100)
## # A tibble: 5 × 4
##   cut           n  tota   pct
##   <ord>     <int> <int> <dbl>
## 1 Fair       1610 53940  2.98
## 2 Good       4906 53940  9.10
## 3 Very Good 12082 53940 22.4 
## 4 Premium   13791 53940 25.6 
## 5 Ideal     21551 53940 40.0
quantile(diamonds$price)
##       0%      25%      50%      75%     100% 
##   326.00   950.00  2401.00  5324.25 18823.00
diamonds1 <- diamonds %>% mutate(price_class=ifelse(price>=5324.25,"best",ifelse(price>=2401,"good",ifelse(price>=950,"normal","bad"))))
table(diamonds1$price_class)  
## 
##    bad   best   good normal 
##  13483  13485  13496  13476
diamonds %>% group_by(cut) %>% summarize(AvgPrice=mean(price)) %>% arrange(desc(AvgPrice))
## # A tibble: 5 × 2
##   cut       AvgPrice
##   <ord>        <dbl>
## 1 Premium      4584.
## 2 Fair         4359.
## 3 Very Good    3982.
## 4 Good         3929.
## 5 Ideal        3458.