library(dplyr) #2
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## 다음의 패키지를 부착합니다: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
data("diamonds")
diamonds %>% filter(price!=1000&cut=="Ideal")%>%head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.23 Ideal J VS1 62.8 56 340 3.93 3.9 2.46
## 3 0.31 Ideal J SI2 62.2 54 344 4.35 4.37 2.71
diamonds %>% filter(price!=1000&color=="E") %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
diamonds %>% filter(carat<1|carat>5) %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
diamonds %>% filter(cut%in%c("Ideal","Good")) %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 3 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
diamonds %>% select(carat,depth,price) %>%
filter(depth==max(depth)|price==min(price))
## # A tibble: 4 × 3
## carat depth price
## <dbl> <dbl> <int>
## 1 0.23 61.5 326
## 2 0.21 59.8 326
## 3 0.5 79 2579
## 4 0.5 79 2579
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
diamonds %>% mutate(Ratio=price/carat,Double=Ratio*2) %>%
head(3)
## # A tibble: 3 × 12
## carat cut color clarity depth table price x y z Ratio Double
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 1417. 2835.
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 1552. 3105.
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 1422. 2843.
diamonds %>% group_by(cut) %>%
summarise(n=n()) %>%
mutate(total=sum(n),pct=n/total*100)
## # A tibble: 5 × 4
## cut n total pct
## <ord> <int> <int> <dbl>
## 1 Fair 1610 53940 2.98
## 2 Good 4906 53940 9.10
## 3 Very Good 12082 53940 22.4
## 4 Premium 13791 53940 25.6
## 5 Ideal 21551 53940 40.0
quantile(diamonds$price)
## 0% 25% 50% 75% 100%
## 326.00 950.00 2401.00 5324.25 18823.00
diamonds <-diamonds %>% mutate(price_class=ifelse(price>=5324.25,"best",
ifelse(price>=2401,"good",
ifelse(price>=950,"normal","bad"))))
table(diamonds$price_class)
##
## bad best good normal
## 13483 13485 13496 13476
diamonds %>% group_by(cut) %>%
summarize(AvgPrice=mean(price)) %>%
arrange(desc(AvgPrice))
## # A tibble: 5 × 2
## cut AvgPrice
## <ord> <dbl>
## 1 Premium 4584.
## 2 Fair 4359.
## 3 Very Good 3982.
## 4 Good 3929.
## 5 Ideal 3458.