#Dplyr Workflow
#install.packages("dplyr")
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#vignette("introduction",package="dplyr")
ls()
## character(0)
rm(list=ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 399139 10.7 750400 20.1 592000 15.9
## Vcells 398598 3.1 1023718 7.9 677529 5.2
memory.size()
## [1] 20.92
memory.limit()
## [1] 1535
#data can be also downloaded from http://bit.ly/dsdata
getwd()
## [1] "C:/Users/dell/Desktop/Teaching"
dir("C:/Users/dell/Desktop/",pattern=".csv")
## [1] "Analytics decisionstats.com Audience Overview 20110617-20120717.csv"
## [2] "BigDiamonds.csv"
## [3] "BigDiamonds.csv.zip"
## [4] "Boston.csv"
## [5] "ccFraud.csv"
## [6] "test.csv"
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:dplyr':
##
## between, last
sessionInfo()
## R version 3.2.2 (2015-08-14)
## Platform: i386-w64-mingw32/i386 (32-bit)
## Running under: Windows 7 (build 7601) Service Pack 1
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] data.table_1.9.4 dplyr_0.4.2
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.1 digest_0.6.8 assertthat_0.1 chron_2.3-47
## [5] R6_2.1.1 plyr_1.8.3 DBI_0.3.1 magrittr_1.5
## [9] evaluate_0.7 stringi_0.5-5 reshape2_1.4.1 rmarkdown_0.7
## [13] tools_3.2.2 stringr_1.0.0 parallel_3.2.2 htmltools_0.2.6
## [17] knitr_1.10.5
diamonds=fread("C:/Users/dell/Desktop/BigDiamonds.csv")
##
Read 3.3% of 598024 rows
Read 8.4% of 598024 rows
Read 15.0% of 598024 rows
Read 20.1% of 598024 rows
Read 26.8% of 598024 rows
Read 31.8% of 598024 rows
Read 36.8% of 598024 rows
Read 40.1% of 598024 rows
Read 46.8% of 598024 rows
Read 53.5% of 598024 rows
Read 58.5% of 598024 rows
Read 61.9% of 598024 rows
Read 65.2% of 598024 rows
Read 70.2% of 598024 rows
Read 75.2% of 598024 rows
Read 80.3% of 598024 rows
Read 87.0% of 598024 rows
Read 92.0% of 598024 rows
Read 97.0% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:27
head(diamonds)
## V1 carat cut color clarity table depth cert measurements price
## 1: 1 0.25 V.Good K I1 59 63.7 GIA 3.96 x 3.95 x 2.52 NA
## 2: 2 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05 x 2.30 NA
## 3: 3 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53 x 2.67 NA
## 4: 4 0.21 V.Good D I1 60 60.6 GIA 3.80 x 3.82 x 2.31 NA
## 5: 5 0.31 V.Good K I1 59 62.2 EGL 4.35 x 4.26 x 2.68 NA
## 6: 6 0.20 Good G SI2 60 64.4 GIA 3.74 x 3.67 x 2.38 NA
## x y z
## 1: 3.96 3.95 2.52
## 2: 4.00 4.05 2.30
## 3: 4.56 4.53 2.67
## 4: 3.80 3.82 2.31
## 5: 4.35 4.26 2.68
## 6: 3.74 3.67 2.38
names(diamonds)
## [1] "V1" "carat" "cut" "color"
## [5] "clarity" "table" "depth" "cert"
## [9] "measurements" "price" "x" "y"
## [13] "z"
#SELECT for selecting by columns
#KEEP ONLY CERTAIN COLUMNS
diamonds2=select(diamonds,carat,price,color,cut )
diamonds2
## carat price color cut
## 1: 0.25 NA K V.Good
## 2: 0.23 NA G Good
## 3: 0.34 NA J Good
## 4: 0.21 NA D V.Good
## 5: 0.31 NA K V.Good
## ---
## 598020: 3.02 99930 E Ideal
## 598021: 5.01 99942 I V.Good
## 598022: 3.43 99960 F Ideal
## 598023: 3.01 99966 E V.Good
## 598024: 4.13 99990 H Ideal
# From column A to B
diamonds2=select(diamonds,carat:price )
diamonds2
## carat cut color clarity table depth cert
## 1: 0.25 V.Good K I1 59.0 63.7 GIA
## 2: 0.23 Good G I1 61.0 58.1 GIA
## 3: 0.34 Good J I2 58.0 58.7 GIA
## 4: 0.21 V.Good D I1 60.0 60.6 GIA
## 5: 0.31 V.Good K I1 59.0 62.2 EGL
## ---
## 598020: 3.02 Ideal E VVS2 58.0 59.8 HRD
## 598021: 5.01 V.Good I VVS2 63.5 61.5 IGI
## 598022: 3.43 Ideal F VS2 54.0 62.7 GIA
## 598023: 3.01 V.Good E VS1 58.0 62.9 GIA
## 598024: 4.13 Ideal H IF 56.0 62.5 IGI
## measurements price
## 1: 3.96 x 3.95 x 2.52 NA
## 2: 4.00 x 4.05 x 2.30 NA
## 3: 4.56 x 4.53 x 2.67 NA
## 4: 3.80 x 3.82 x 2.31 NA
## 5: 4.35 x 4.26 x 2.68 NA
## ---
## 598020: 9.43 x 9.51 x 5.66 99930
## 598021: 10.78 x 10.89 x 6.68 99942
## 598022: 9.66 x 9.61 x 6.05 99960
## 598023: 9.15 x 9.19 x 5.77 99966
## 598024: 10.27 x 10.19 x 6.4 99990
names(diamonds2)
## [1] "carat" "cut" "color" "clarity"
## [5] "table" "depth" "cert" "measurements"
## [9] "price"
#Drop Certain Columns
diamonds2=select(diamonds,-table)
names(diamonds2)
## [1] "V1" "carat" "cut" "color"
## [5] "clarity" "depth" "cert" "measurements"
## [9] "price" "x" "y" "z"
#Drop Certain Columns
diamonds2=select(diamonds,-table,-V1)
names(diamonds2)
## [1] "carat" "cut" "color" "clarity"
## [5] "depth" "cert" "measurements" "price"
## [9] "x" "y" "z"
#FILTER select rows by condition
diamonds3=filter(diamonds2,price>5000)
dim(diamonds2)
## [1] 598024 11
dim(diamonds3)
## [1] 247060 11
summary(diamonds2$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 300 1220 3503 8753 11170 99990 713
summary(diamonds3$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5001 7930 14120 18490 21760 99990
#SELECT ROWS BY POSITION
diamonds[100:900,]
## V1 carat cut color clarity table depth cert measurements
## 1: 100 0.24 V.Good I SI1 57 63.0 IGI 3.96 x 4.00 x 2.50
## 2: 101 0.25 Ideal D I1 59 60.0 GIA 4.08 x 4.10 x 2.45
## 3: 102 0.24 Good F VVS2 64 63.6 GIA 3.89 x 3.82 x 2.45
## 4: 103 0.23 Ideal J SI1 59 62.4 EGL 2.46 x 3.9 x 3.98
## 5: 104 0.21 Good I SI2 0 0.0 OTHER 0.00-0.00 x 0.00
## ---
## 797: 896 0.23 Ideal G VS2 55 62.7 GIA 3.94 x 3.96 x 2.48
## 798: 897 0.21 V.Good I IF 61 59.8 GIA 3.87 x 3.90 x 2.32
## 799: 898 0.25 Ideal F SI1 67 61.5 GIA 4.07 x 4.09 x 2.51
## 800: 899 0.24 Good G VS2 66 59.5 GIA 4.01 x 4.04 x 2.40
## 801: 900 0.30 Good G I1 62 56.5 GIA 4.45 x 4.47 x 2.52
## price x y z
## 1: NA 3.96 4.00 2.50
## 2: NA 4.08 4.10 2.45
## 3: NA 3.89 3.82 2.45
## 4: NA 2.46 3.90 3.98
## 5: NA NA NA NA
## ---
## 797: 334 3.94 3.96 2.48
## 798: 334 3.87 3.90 2.32
## 799: 334 4.07 4.09 2.51
## 800: 334 4.01 4.04 2.40
## 801: 334 4.45 4.47 2.52
slice(diamonds,100:900)
## V1 carat cut color clarity table depth cert measurements
## 1: 100 0.24 V.Good I SI1 57 63.0 IGI 3.96 x 4.00 x 2.50
## 2: 101 0.25 Ideal D I1 59 60.0 GIA 4.08 x 4.10 x 2.45
## 3: 102 0.24 Good F VVS2 64 63.6 GIA 3.89 x 3.82 x 2.45
## 4: 103 0.23 Ideal J SI1 59 62.4 EGL 2.46 x 3.9 x 3.98
## 5: 104 0.21 Good I SI2 0 0.0 OTHER 0.00-0.00 x 0.00
## ---
## 797: 896 0.23 Ideal G VS2 55 62.7 GIA 3.94 x 3.96 x 2.48
## 798: 897 0.21 V.Good I IF 61 59.8 GIA 3.87 x 3.90 x 2.32
## 799: 898 0.25 Ideal F SI1 67 61.5 GIA 4.07 x 4.09 x 2.51
## 800: 899 0.24 Good G VS2 66 59.5 GIA 4.01 x 4.04 x 2.40
## 801: 900 0.30 Good G I1 62 56.5 GIA 4.45 x 4.47 x 2.52
## price x y z
## 1: NA 3.96 4.00 2.50
## 2: NA 4.08 4.10 2.45
## 3: NA 3.89 3.82 2.45
## 4: NA 2.46 3.90 3.98
## 5: NA NA NA NA
## ---
## 797: 334 3.94 3.96 2.48
## 798: 334 3.87 3.90 2.32
## 799: 334 4.07 4.09 2.51
## 800: 334 4.01 4.04 2.40
## 801: 334 4.45 4.47 2.52
#Arrange helps in sorting
arrange(diamonds3,price)
## carat cut color clarity depth cert measurements price
## 1: 1.03 Ideal J VS2 62.0 GIA 6.43 x 6.48 x 4.00 5001
## 2: 1.01 V.Good H SI2 63.0 GIA 6.32 x 6.35 x 3.99 5001
## 3: 1.04 Ideal G SI2 59.8 GIA 6.57 x 6.59 x 3.94 5001
## 4: 0.90 Ideal G VS2 62.8 GIA 6.17 x 6.14 x 3.87 5001
## 5: 1.05 Ideal I SI1 60.3 GIA 6.60 x 6.57 x 3.97 5001
## ---
## 247056: 3.43 Ideal F VS2 62.7 GIA 9.66 x 9.61 x 6.05 99960
## 247057: 3.01 V.Good E VS1 62.9 GIA 9.15 x 9.19 x 5.77 99966
## 247058: 3.01 V.Good E VS1 62.9 GIA 9.15 x 9.19 x 5.77 99966
## 247059: 4.13 Ideal H IF 62.5 IGI 10.27 x 10.19 x 6.4 99990
## 247060: 4.13 Ideal H IF 62.5 IGI 10.27 x 10.19 x 6.4 99990
## x y z
## 1: 6.43 6.48 4.00
## 2: 6.32 6.35 3.99
## 3: 6.57 6.59 3.94
## 4: 6.17 6.14 3.87
## 5: 6.60 6.57 3.97
## ---
## 247056: 9.66 9.61 6.05
## 247057: 9.15 9.19 5.77
## 247058: 9.15 9.19 5.77
## 247059: 10.27 10.19 6.40
## 247060: 10.27 10.19 6.40
arrange(diamonds3,desc(price))
## carat cut color clarity depth cert measurements price
## 1: 4.13 Ideal H IF 62.5 IGI 10.27 x 10.19 x 6.4 99990
## 2: 4.13 Ideal H IF 62.5 IGI 10.27 x 10.19 x 6.4 99990
## 3: 3.01 V.Good E VS1 62.9 GIA 9.15 x 9.19 x 5.77 99966
## 4: 3.01 V.Good E VS1 62.9 GIA 9.15 x 9.19 x 5.77 99966
## 5: 3.43 Ideal F VS2 62.7 GIA 9.66 x 9.61 x 6.05 99960
## ---
## 247056: 0.90 Ideal G VS2 62.8 GIA 6.17 x 6.14 x 3.87 5001
## 247057: 1.05 Ideal I SI1 60.3 GIA 6.60 x 6.57 x 3.97 5001
## 247058: 1.09 V.Good D VS2 61.8 EGL 6.66 x 6.55 x 4.08 5001
## 247059: 1.27 Ideal G SI2 62.1 EGL 6.96 x 6.91 x 4.31 5001
## 247060: 1.40 Good G SI2 63.7 EGL 6.95 x 6.89 x 4.44 5001
## x y z
## 1: 10.27 10.19 6.40
## 2: 10.27 10.19 6.40
## 3: 9.15 9.19 5.77
## 4: 9.15 9.19 5.77
## 5: 9.66 9.61 6.05
## ---
## 247056: 6.17 6.14 3.87
## 247057: 6.60 6.57 3.97
## 247058: 6.66 6.55 4.08
## 247059: 6.96 6.91 4.31
## 247060: 6.95 6.89 4.44
arrange(diamonds3,desc(carat))
## carat cut color clarity depth cert measurements
## 1: 9.25 Ideal L SI2 62.0 EGL 13.24 x 13.33 x 8.28
## 2: 9.25 Ideal L SI2 62.0 EGL USA 13.24 x 13.33 x 8.28
## 3: 9.25 Ideal L SI2 62.0 EGL USA 13.24 x 13.33 x 8.28
## 4: 9.25 Ideal L SI2 62.0 EGL 13.24 x 13.33 x 8.28
## 5: 9.25 Ideal L SI2 62.0 EGL USA 13.24 x 13.33 x 8.28
## ---
## 247056: 0.54 Ideal D IF 61.9 GIA 5.23 x 5.26 x 3.24
## 247057: 0.53 Ideal D IF 61.7 GIA 5.20 x 5.23 x 3.22
## 247058: 0.51 Ideal D IF 62.0 IGI 3.16 x 5.07 x 5.12
## 247059: 0.47 V.Good G VS2 62.6 EGL 3.1 x 4.92 x 4.99
## 247060: 0.46 Ideal H VS2 61.8 AGS 4.95 x 5.00 x 3.07
## price x y z
## 1: 81386 13.24 13.33 8.28
## 2: 82890 13.24 13.33 8.28
## 3: 84849 13.24 13.33 8.28
## 4: 81386 13.24 13.33 8.28
## 5: 82890 13.24 13.33 8.28
## ---
## 247056: 5251 5.23 5.26 3.24
## 247057: 5686 5.20 5.23 3.22
## 247058: 5199 3.16 5.07 5.12
## 247059: 5325 3.10 4.92 4.99
## 247060: 12957 4.95 5.00 3.07
#Create new variables
mutate(diamonds3,rate=price/carat)
## carat cut color clarity depth cert measurements
## 1: 1.03 Ideal J VS2 62.0 GIA 6.43 x 6.48 x 4.00
## 2: 1.01 V.Good H SI2 63.0 GIA 6.32 x 6.35 x 3.99
## 3: 1.04 Ideal G SI2 59.8 GIA 6.57 x 6.59 x 3.94
## 4: 0.90 Ideal G VS2 62.8 GIA 6.17 x 6.14 x 3.87
## 5: 1.05 Ideal I SI1 60.3 GIA 6.60 x 6.57 x 3.97
## ---
## 247056: 3.02 Ideal E VVS2 59.8 HRD 9.43 x 9.51 x 5.66
## 247057: 5.01 V.Good I VVS2 61.5 IGI 10.78 x 10.89 x 6.68
## 247058: 3.43 Ideal F VS2 62.7 GIA 9.66 x 9.61 x 6.05
## 247059: 3.01 V.Good E VS1 62.9 GIA 9.15 x 9.19 x 5.77
## 247060: 4.13 Ideal H IF 62.5 IGI 10.27 x 10.19 x 6.4
## price x y z rate
## 1: 5001 6.43 6.48 4.00 4855.340
## 2: 5001 6.32 6.35 3.99 4951.485
## 3: 5001 6.57 6.59 3.94 4808.654
## 4: 5001 6.17 6.14 3.87 5556.667
## 5: 5001 6.60 6.57 3.97 4762.857
## ---
## 247056: 99930 9.43 9.51 5.66 33089.404
## 247057: 99942 10.78 10.89 6.68 19948.503
## 247058: 99960 9.66 9.61 6.05 29142.857
## 247059: 99966 9.15 9.19 5.77 33211.296
## 247060: 99990 10.27 10.19 6.40 24210.654
names(diamonds3)
## [1] "carat" "cut" "color" "clarity"
## [5] "depth" "cert" "measurements" "price"
## [9] "x" "y" "z"
diamonds3=mutate(diamonds3,rate=price/carat)
names(diamonds3)
## [1] "carat" "cut" "color" "clarity"
## [5] "depth" "cert" "measurements" "price"
## [9] "x" "y" "z" "rate"
summary(diamonds3$rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1263 6312 8392 9673 11490 49520
transmute(diamonds3,rate2=price*100/carat)
## rate2
## 1: 485534.0
## 2: 495148.5
## 3: 480865.4
## 4: 555666.7
## 5: 476285.7
## ---
## 247056: 3308940.4
## 247057: 1994850.3
## 247058: 2914285.7
## 247059: 3321129.6
## 247060: 2421065.4
diamonds4=transmute(diamonds3,rate2=price*100/carat)
diamonds4
## rate2
## 1: 485534.0
## 2: 495148.5
## 3: 480865.4
## 4: 555666.7
## 5: 476285.7
## ---
## 247056: 3308940.4
## 247057: 1994850.3
## 247058: 2914285.7
## 247059: 3321129.6
## 247060: 2421065.4
summarise(diamonds3,mean(price))
## mean(price)
## 1: 18494.24
gr1=group_by(diamonds3,cut)
summarise(gr1,mean(price))
## Warning in gmean(price): Group 1 summed to more than type 'integer'
## can hold so the result has been coerced to 'numeric' automatically, for
## convenience.
## Source: local data table [3 x 2]
##
## cut mean(price)
## 1 Ideal 18999.73
## 2 V.Good 17644.57
## 3 Good 15819.84
summarise(gr1,
avgprice=mean(price),
newc=n())
## Warning in gmean(price): Group 1 summed to more than type 'integer'
## can hold so the result has been coerced to 'numeric' automatically, for
## convenience.
## Source: local data table [3 x 3]
##
## cut avgprice newc
## 1 Ideal 18999.73 174143
## 2 V.Good 17644.57 58629
## 3 Good 15819.84 14288
sample_n(diamonds,600,F)
## V1 carat cut color clarity table depth cert measurements
## 1: 389711 1.43 Ideal E SI1 60 62.9 EGL 7.10 x 7.15 x 4.49
## 2: 433085 1.22 Ideal H VVS1 59 62.0 GIA 6.84 x 6.81 x 4.23
## 3: 526549 1.12 Ideal F VVS1 58 60.9 HRD 4.1 x 6.72 x 6.74
## 4: 525913 1.25 Ideal G IF 59 60.9 GIA 6.97 x 6.93 x 4.23
## 5: 287541 0.71 V.Good H VVS1 59 62.9 GIA 5.62 x 5.66 x 3.55
## ---
## 596: 279708 0.70 V.Good E VS1 52 63.8 GIA 5.57 x 5.63 x 3.57
## 597: 321363 0.90 Good J VVS1 58 63.2 GIA 6.08 x 6.05 x 3.83
## 598: 4800 0.26 Ideal F SI1 54 62.0 GIA 4.07 x 4.10 x 2.53
## 599: 195028 0.45 Ideal F VVS1 57 62.8 GIA 4.88 x 4.86 x 3.06
## 600: 553343 1.85 Ideal H VS2 58 62.2 GIA 7.83 x 7.85 x 4.87
## price x y z
## 1: 6604 7.10 7.15 4.49
## 2: 9530 6.84 6.81 4.23
## 3: 13645 4.10 6.72 6.74
## 4: 13557 6.97 6.93 4.23
## 5: 3229 5.62 5.66 3.55
## ---
## 596: 3057 5.57 5.63 3.57
## 597: 4088 6.08 6.05 3.83
## 598: 433 4.07 4.10 2.53
## 599: 1690 4.88 4.86 3.06
## 600: 18480 7.83 7.85 4.87
sample_frac(diamonds,0.01)
## V1 carat cut color clarity table depth cert
## 1: 515882 3.01 Ideal I VS2 58.0 60.0 GIA
## 2: 409053 1.50 Good G VS1 57.0 64.9 EGL ISRAEL
## 3: 561264 2.10 Ideal G SI1 58.0 62.0 GIA
## 4: 327598 0.90 Ideal E SI2 61.0 60.7 EGL
## 5: 366527 1.00 V.Good G SI1 58.0 63.6 GIA
## ---
## 5976: 173269 0.71 V.Good J SI1 57.0 64.2 GIA
## 5977: 219724 0.50 V.Good D VVS2 57.5 64.7 IGI
## 5978: 127090 0.58 V.Good J SI2 58.0 61.8 IGI
## 5979: 186970 0.54 Ideal F SI1 55.0 61.8 GIA
## 5980: 182459 0.50 V.Good F VS2 55.0 63.3 GIA
## measurements price x y z
## 1: 5.63 x 9.34 x 9.41 44816 5.63 9.34 9.41
## 2: 7.05 x 7.09 x 4.59 7690 7.05 7.09 4.59
## 3: 8.12 x 8.19 x 5.06 20819 8.12 8.19 5.06
## 4: 3.78 x 6.2 x 6.26 4266 3.78 6.20 6.26
## 5: 6.27 x 6.31 x 4.00 5575 6.27 6.31 4.00
## ---
## 5976: 5.58 x 5.55 x 3.58 1440 5.58 5.55 3.58
## 5977: 4.98 x 5.03 x 3.24 1990 4.98 5.03 3.24
## 5978: 5.26 x 5.38 x 3.28 1041 5.26 5.38 3.28
## 5979: 5.22 x 5.26 x 3.24 1597 5.22 5.26 3.24
## 5980: 5.03 x 5.05 x 3.19 1543 5.03 5.05 3.19
#vignette("introduction",package="dplyr")
#using vignette and summarize
#do the following in dplyr
dcolor <- group_by(diamonds, color)
summarise(dcolor,mean(price,na.rm=T))
## Source: local data table [9 x 2]
##
## color mean(price, na.rm = T)
## 1 K 9694.257
## 2 G 8984.200
## 3 J 9423.581
## 4 D 8266.346
## 5 F 8234.730
## 6 E 7282.990
## 7 H 9941.795
## 8 I 9541.319
## 9 L 7109.228
diamondstest=filter(diamonds,carat>4)
dcolor <- group_by(diamondstest, color)
summarise(dcolor,mean(price,na.rm=T))
## Source: local data table [9 x 2]
##
## color mean(price, na.rm = T)
## 1 D 55053.01
## 2 E 55229.97
## 3 G 58614.04
## 4 K 55531.86
## 5 J 60725.33
## 6 I 65464.33
## 7 H 62630.72
## 8 F 59957.27
## 9 L 49344.77
diamondstest2=filter(diamonds,carat<3 & color=="J")
dcolor2 <- group_by(diamondstest2, color)
summarise(dcolor2,mean(price,na.rm=T))
## Source: local data table [1 x 2]
##
## color mean(price, na.rm = T)
## 1 J 5767.972
diamondstest3=filter(diamonds,carat<4.5 & color=="K" & cut=="Ideal")
dcolor3 = group_by(diamondstest3,color)
summarise(dcolor3,mean(price,na.rm=T))
## Source: local data table [1 x 2]
##
## color mean(price, na.rm = T)
## 1 K 10046.97
diamondstest4=filter(diamonds,carat<3 & color=="J" & cut =="Ideal")
dcut2=group_by(diamondstest4,cut)
summarise(dcut2, mean(price, na.rm = T))
## Source: local data table [1 x 2]
##
## cut mean(price, na.rm = T)
## 1 Ideal 6639.536
diamondstest4=filter(diamonds,
price>3000 &
color=="I" &
cut=="V.Good")
dcut2=group_by(diamondstest4,cut)
summarise(dcut2, mean(price, na.rm = T))
## Source: local data table [1 x 2]
##
## cut mean(price, na.rm = T)
## 1 V.Good 13373.24
# What is average price for
diamonds[price>3000 ,mean(price,na.rm=T),.(color,cut)]
## color cut V1
## 1: G V.Good 13912.689
## 2: F Ideal 16560.190
## 3: J Ideal 15106.234
## 4: G Ideal 16127.682
## 5: E Good 9562.663
## 6: I Good 10568.739
## 7: I V.Good 13373.244
## 8: E V.Good 12512.359
## 9: H V.Good 13546.145
## 10: G Good 11217.941
## 11: F V.Good 13153.974
## 12: E Ideal 16549.568
## 13: I Ideal 15601.083
## 14: H Ideal 16766.347
## 15: D Ideal 17991.784
## 16: D V.Good 13063.586
## 17: J Good 10946.462
## 18: K Ideal 15217.389
## 19: F Good 11114.473
## 20: H Good 11490.488
## 21: K V.Good 15361.029
## 22: J V.Good 14305.686
## 23: D Good 10355.956
## 24: L Ideal 12607.748
## 25: L V.Good 13453.077
## 26: K Good 12485.608
## 27: L Good 13843.234
## color cut V1
diamonds[price>3000 &
color=="I" &
cut=="V.Good",
mean(price,na.rm=T),
]
## [1] 13373.24