rm(list=ls())
getwd()
## [1] "C:/Users/LEEDAEJOON/Documents"
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
library(readxl)
data("diamonds")
diamonds1<-diamonds %>% rename(c=clarity, p=price)
head(diamonds1, 3)
## # A tibble: 3 x 10
## carat cut color c depth table p x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
count(diamonds, cut)
## # A tibble: 5 x 2
## cut n
## <ord> <int>
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
table(diamonds$cut)
##
## Fair Good Very Good Premium Ideal
## 1610 4906 12082 13791 21551
class(count(diamonds, cut))
## [1] "tbl_df" "tbl" "data.frame"
class(table(diamonds$cut))
## [1] "table"
df1<-diamonds %>% select(carat, price)
head(df1, 3)
## # A tibble: 3 x 2
## carat price
## <dbl> <int>
## 1 0.23 326
## 2 0.21 326
## 3 0.23 327
df2<-diamonds %>%
select(-carat, -price)
head(df2, 3)
## # A tibble: 3 x 8
## cut color clarity depth table x y z
## <ord> <ord> <ord> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ideal E SI2 61.5 55 3.95 3.98 2.43
## 2 Premium E SI1 59.8 61 3.89 3.84 2.31
## 3 Good E VS1 56.9 65 4.05 4.07 2.31
head(diamonds)
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
diamonds %>%
slice(1:5)
## # A tibble: 5 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
diamonds %>%
slice(-1:-100)
## # A tibble: 53,840 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.75 Very Good D SI1 63.2 56 2760 5.8 5.75 3.65
## 2 0.75 Premium E SI1 59.9 54 2760 6 5.96 3.58
## 3 0.74 Ideal G SI1 61.6 55 2760 5.8 5.85 3.59
## 4 0.75 Premium G VS2 61.7 58 2760 5.85 5.79 3.59
## 5 0.8 Ideal I VS1 62.9 56 2760 5.94 5.87 3.72
## 6 0.75 Ideal G SI1 62.2 55 2760 5.87 5.8 3.63
## 7 0.8 Premium G SI1 63 59 2760 5.9 5.81 3.69
## 8 0.74 Ideal I VVS2 62.3 55 2761 5.77 5.81 3.61
## 9 0.81 Ideal F SI2 58.8 57 2761 6.14 6.11 3.6
## 10 0.59 Ideal E VVS2 62 55 2761 5.38 5.43 3.35
## # ... with 53,830 more rows
diamonds %>%
filter(cut=="Good") %>% head(3)
## # A tibble: 3 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 2 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 3 0.3 Good J SI1 64 55 339 4.25 4.28 2.73
max(diamonds$price)
## [1] 18823
diamonds %>%
filter(price==max(price))
## # A tibble: 1 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
diamonds %>%
filter(price==18823)
## # A tibble: 1 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
diamonds %>%
filter(cut!="Preminum")%>% head(3)
## # A tibble: 3 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
diamonds %>%
filter(price!=1000) %>% head(3)
## # A tibble: 3 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
diamonds %>%
filter(price>=1000) %>% head(3)
## # A tibble: 3 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.7 Ideal E SI1 62.5 57 2757 5.7 5.72 3.57
## 2 0.86 Fair E SI2 55.1 69 2757 6.45 6.33 3.52
## 3 0.7 Ideal G VS2 61.6 56 2757 5.7 5.67 3.5
diamonds %>% filter(price!=1000 & cut=="Ideal") %>% head(3)
## # A tibble: 3 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.23 Ideal J VS1 62.8 56 340 3.93 3.9 2.46
## 3 0.31 Ideal J SI2 62.2 54 344 4.35 4.37 2.71
diamonds %>% filter(price!=1000 & cut=="Ideal" & color=="E") %>% head(3)
## # A tibble: 3 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.26 Ideal E VVS2 62.9 58 554 4.02 4.06 2.54
## 3 0.7 Ideal E SI1 62.5 57 2757 5.7 5.72 3.57
diamonds %>% filter(carat < 1 | carat > 5) %>% head(3)
## # A tibble: 3 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
diamonds %>% filter(cut%in%c("Ideal", "Good")) %>% head(3)
## # A tibble: 3 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 3 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
diamonds %>% select(carat, depth, price) %>%
filter(depth==max(depth)|price==min(price))
## # A tibble: 4 x 3
## carat depth price
## <dbl> <dbl> <int>
## 1 0.23 61.5 326
## 2 0.21 59.8 326
## 3 0.5 79 2579
## 4 0.5 79 2579
diamonds %>% mutate(Ratio=price/carat, Double=Ratio*2) %>% head(3)
## # A tibble: 3 x 12
## carat cut color clarity depth table price x y z Ratio Double
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 1417. 2835.
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 1552. 3105.
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 1422. 2843.
diamonds %>% summarize(mean(price))
## # A tibble: 1 x 1
## `mean(price)`
## <dbl>
## 1 3933.
diamonds %>% summarize(AvgPrice=mean(price),
MedianPrice=median(price),
AvgCarat=mean(carat))
## # A tibble: 1 x 3
## AvgPrice MedianPrice AvgCarat
## <dbl> <dbl> <dbl>
## 1 3933. 2401 0.798
diamonds %>% group_by(cut) %>%
summarize(AvgPrice=mean(price),
SumCarat=sum(carat))
## # A tibble: 5 x 3
## cut AvgPrice SumCarat
## <ord> <dbl> <dbl>
## 1 Fair 4359. 1684.
## 2 Good 3929. 4166.
## 3 Very Good 3982. 9743.
## 4 Premium 4584. 12301.
## 5 Ideal 3458. 15147.
diamonds %>% group_by(cut) %>%
summarize(n=n()) %>%
mutate(total=sum(n),
pct=n/total*100)
## # A tibble: 5 x 4
## cut n total pct
## <ord> <int> <int> <dbl>
## 1 Fair 1610 53940 2.98
## 2 Good 4906 53940 9.10
## 3 Very Good 12082 53940 22.4
## 4 Premium 13791 53940 25.6
## 5 Ideal 21551 53940 40.0
quantile(diamonds$price)
## 0% 25% 50% 75% 100%
## 326.00 950.00 2401.00 5324.25 18823.00
diamonds1<-diamonds %>%
mutate(price_class=
ifelse(price>=5324.25, "best",
ifelse(price>=2401, "good",
ifelse(price>=950, "normal", "bad"))))
table(diamonds1$price_class)
##
## bad best good normal
## 13483 13485 13496 13476
diamonds %>% group_by(cut) %>%
summarize(AvgPrice=mean(price)) %>%
arrange(desc(AvgPrice))
## # A tibble: 5 x 2
## cut AvgPrice
## <ord> <dbl>
## 1 Premium 4584.
## 2 Fair 4359.
## 3 Very Good 3982.
## 4 Good 3929.
## 5 Ideal 3458.
ott1<-data.frame(id=c(1,2,3),
car=c("bmw", "bmw", "bmw"),
fe=c(20, 22, 24))
ott1
## id car fe
## 1 1 bmw 20
## 2 2 bmw 22
## 3 3 bmw 24
ott2<-data.frame(id=c(1,4,5), fe1=c(30,34,35))
ott2
## id fe1
## 1 1 30
## 2 4 34
## 3 5 35
left_join(ott1,ott2,by="id")
## id car fe fe1
## 1 1 bmw 20 30
## 2 2 bmw 22 NA
## 3 3 bmw 24 NA