library(dplyr) #2 
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## 다음의 패키지를 부착합니다: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
data("diamonds")
diamonds %>% filter(price!=1000&cut=="Ideal")%>%head(3)
## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.23 Ideal J     VS1      62.8    56   340  3.93  3.9   2.46
## 3  0.31 Ideal J     SI2      62.2    54   344  4.35  4.37  2.71
diamonds %>% filter(price!=1000&color=="E") %>% head(3)
## # A tibble: 3 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31
diamonds %>% filter(carat<1|carat>5) %>% head(3)
## # A tibble: 3 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31
diamonds %>% filter(cut%in%c("Ideal","Good")) %>% head(3)
## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.23 Good  E     VS1      56.9    65   327  4.05  4.07  2.31
## 3  0.31 Good  J     SI2      63.3    58   335  4.34  4.35  2.75
diamonds %>% select(carat,depth,price) %>% 
    filter(depth==max(depth)|price==min(price))
## # A tibble: 4 × 3
##   carat depth price
##   <dbl> <dbl> <int>
## 1  0.23  61.5   326
## 2  0.21  59.8   326
## 3  0.5   79    2579
## 4  0.5   79    2579
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
diamonds %>% mutate(Ratio=price/carat,Double=Ratio*2) %>% 
  head(3)
## # A tibble: 3 × 12
##   carat cut     color clarity depth table price     x     y     z Ratio Double
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43 1417.  2835.
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31 1552.  3105.
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31 1422.  2843.
diamonds %>% group_by(cut) %>% 
  summarise(n=n()) %>% 
  mutate(total=sum(n),pct=n/total*100)
## # A tibble: 5 × 4
##   cut           n total   pct
##   <ord>     <int> <int> <dbl>
## 1 Fair       1610 53940  2.98
## 2 Good       4906 53940  9.10
## 3 Very Good 12082 53940 22.4 
## 4 Premium   13791 53940 25.6 
## 5 Ideal     21551 53940 40.0
quantile(diamonds$price)
##       0%      25%      50%      75%     100% 
##   326.00   950.00  2401.00  5324.25 18823.00
diamonds <-diamonds %>% mutate(price_class=ifelse(price>=5324.25,"best",
                                                 ifelse(price>=2401,"good",
                                                        ifelse(price>=950,"normal","bad"))))
table(diamonds$price_class)
## 
##    bad   best   good normal 
##  13483  13485  13496  13476
diamonds %>%  group_by(cut) %>% 
  summarize(AvgPrice=mean(price)) %>% 
  arrange(desc(AvgPrice))
## # A tibble: 5 × 2
##   cut       AvgPrice
##   <ord>        <dbl>
## 1 Premium      4584.
## 2 Fair         4359.
## 3 Very Good    3982.
## 4 Good         3929.
## 5 Ideal        3458.