#install.packages("abc")
#install.packages('zoo')
BigDiamonds <- read.csv("C:/Users/ajaohri/Desktop/mentoring/BigDiamonds.csv")
#readr is faster than base import
ls()
## [1] "BigDiamonds"
rm(BigDiamonds)
a=c(1,2,3,4)
rm(list=ls())
library(readr)
BigDiamonds2 <- read_csv("C:/Users/ajaohri/Desktop/mentoring/BigDiamonds.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_integer(),
## carat = col_double(),
## cut = col_character(),
## color = col_character(),
## clarity = col_character(),
## table = col_double(),
## depth = col_double(),
## cert = col_character(),
## measurements = col_character(),
## price = col_integer(),
## x = col_double(),
## y = col_double(),
## z = col_double()
## )
head(BigDiamonds2)
## # A tibble: 6 x 13
## X1 carat cut color clarity table depth cert measurements price
## <int> <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <int>
## 1 1 0.25 V.Go~ K I1 59 63.7 GIA 3.96 x 3.95~ NA
## 2 2 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05~ NA
## 3 3 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53~ NA
## 4 4 0.21 V.Go~ D I1 60 60.6 GIA 3.80 x 3.82~ NA
## 5 5 0.31 V.Go~ K I1 59 62.2 EGL 4.35 x 4.26~ NA
## 6 6 0.2 Good G SI2 60 64.4 GIA 3.74 x 3.67~ NA
## # ... with 3 more variables: x <dbl>, y <dbl>, z <dbl>
tail(BigDiamonds2)
## # A tibble: 6 x 13
## X1 carat cut color clarity table depth cert measurements price
## <int> <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <int>
## 1 598019 3.01 Ideal D VS2 58 62 GIA 9.25 x 9.2 ~ 99920
## 2 598020 3.02 Ideal E VVS2 58 59.8 HRD 9.43 x 9.51~ 99930
## 3 598021 5.01 V.Go~ I VVS2 63.5 61.5 IGI 10.78 x 1~ 99942
## 4 598022 3.43 Ideal F VS2 54 62.7 GIA 9.66 x 9.61~ 99960
## 5 598023 3.01 V.Go~ E VS1 58 62.9 GIA 9.15 x 9.19~ 99966
## 6 598024 4.13 Ideal H IF 56 62.5 IGI 10.27 x 10.~ 99990
## # ... with 3 more variables: x <dbl>, y <dbl>, z <dbl>
head(BigDiamonds2,10)
## # A tibble: 10 x 13
## X1 carat cut color clarity table depth cert measurements price
## <int> <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <int>
## 1 1 0.25 V.Go~ K I1 59 63.7 GIA 3.96 x 3.95~ NA
## 2 2 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05~ NA
## 3 3 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53~ NA
## 4 4 0.21 V.Go~ D I1 60 60.6 GIA 3.80 x 3.82~ NA
## 5 5 0.31 V.Go~ K I1 59 62.2 EGL 4.35 x 4.26~ NA
## 6 6 0.2 Good G SI2 60 64.4 GIA 3.74 x 3.67~ NA
## 7 7 0.2 Good G SI2 63 62.6 GIA 3.72 x 3.65~ NA
## 8 8 0.22 V.Go~ D I1 61 59.2 GIA 3.95 x 3.97~ NA
## 9 9 0.23 V.Go~ K SI2 57.5 63.6 IGI 3.87 x 3.90~ NA
## 10 10 0.2 Good F SI1 65 54.9 GIA 3.83 x 4.00~ NA
## # ... with 3 more variables: x <dbl>, y <dbl>, z <dbl>
str(BigDiamonds2)
## Classes 'tbl_df', 'tbl' and 'data.frame': 598024 obs. of 13 variables:
## $ X1 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ carat : num 0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
## $ cut : chr "V.Good" "Good" "Good" "V.Good" ...
## $ color : chr "K" "G" "J" "D" ...
## $ clarity : chr "I1" "I1" "I2" "I1" ...
## $ table : num 59 61 58 60 59 60 63 61 57.5 65 ...
## $ depth : num 63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
## $ cert : chr "GIA" "GIA" "GIA" "GIA" ...
## $ measurements: chr "3.96 x 3.95 x 2.52" "4.00 x 4.05 x 2.30" "4.56 x 4.53 x 2.67" "3.80 x 3.82 x 2.31" ...
## $ price : int NA NA NA NA NA NA NA NA NA NA ...
## $ x : num 3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
## $ y : num 3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
## $ z : num 2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 13
## .. ..$ X1 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ carat : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ cut : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ color : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ clarity : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ table : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ depth : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ cert : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ measurements: list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ price : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ x : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ y : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ z : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
str(BigDiamonds2$depth)
## num [1:598024] 63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
nrow(BigDiamonds2)
## [1] 598024
ncol(BigDiamonds2)
## [1] 13
length(BigDiamonds2)
## [1] 13
summary(BigDiamonds2)
## X1 carat cut color
## Min. : 1 Min. :0.200 Length:598024 Length:598024
## 1st Qu.:149507 1st Qu.:0.500 Class :character Class :character
## Median :299013 Median :0.900 Mode :character Mode :character
## Mean :299013 Mean :1.071
## 3rd Qu.:448518 3rd Qu.:1.500
## Max. :598024 Max. :9.250
##
## clarity table depth cert
## Length:598024 Min. : 0.00 Min. : 0.00 Length:598024
## Class :character 1st Qu.:56.00 1st Qu.:61.00 Class :character
## Mode :character Median :58.00 Median :62.10 Mode :character
## Mean :57.63 Mean :61.06
## 3rd Qu.:59.00 3rd Qu.:62.70
## Max. :75.90 Max. :81.30
##
## measurements price x y
## Length:598024 Min. : 300 Min. : 0.150 Min. : 1.000
## Class :character 1st Qu.: 1220 1st Qu.: 4.740 1st Qu.: 4.970
## Mode :character Median : 3503 Median : 5.780 Median : 6.050
## Mean : 8753 Mean : 5.991 Mean : 6.199
## 3rd Qu.:11174 3rd Qu.: 6.970 3rd Qu.: 7.230
## Max. :99990 Max. :13.890 Max. :13.890
## NA's :713 NA's :1815 NA's :1852
## z
## Min. : 0.040
## 1st Qu.: 3.120
## Median : 3.860
## Mean : 4.033
## 3rd Qu.: 4.610
## Max. :13.180
## NA's :2544
BigDiamonds2$cut=as.factor(BigDiamonds2$cut)
summary(BigDiamonds2$cut)
## Good Ideal V.Good
## 59680 369448 168896
BigDiamonds2$color=as.factor(BigDiamonds2$color)
summary(BigDiamonds2$color)
## D E F G H I J K L
## 73630 93483 93573 96204 86619 70282 48709 25868 9656
BigDiamonds2[1:5,3]
## # A tibble: 5 x 1
## cut
## <fct>
## 1 V.Good
## 2 Good
## 3 Good
## 4 V.Good
## 5 V.Good
ind <- sapply(BigDiamonds2, is.character)
ind
## X1 carat cut color clarity
## FALSE FALSE FALSE FALSE TRUE
## table depth cert measurements price
## FALSE FALSE TRUE TRUE FALSE
## x y z
## FALSE FALSE FALSE
head(BigDiamonds2[ind] )
## # A tibble: 6 x 3
## clarity cert measurements
## <chr> <chr> <chr>
## 1 I1 GIA 3.96 x 3.95 x 2.52
## 2 I1 GIA 4.00 x 4.05 x 2.30
## 3 I2 GIA 4.56 x 4.53 x 2.67
## 4 I1 GIA 3.80 x 3.82 x 2.31
## 5 I1 EGL 4.35 x 4.26 x 2.68
## 6 SI2 GIA 3.74 x 3.67 x 2.38
BigDiamonds2[ind] <- lapply(BigDiamonds2[ind], factor)
summary(BigDiamonds2[1:8])
## X1 carat cut color
## Min. : 1 Min. :0.200 Good : 59680 G :96204
## 1st Qu.:149507 1st Qu.:0.500 Ideal :369448 F :93573
## Median :299013 Median :0.900 V.Good:168896 E :93483
## Mean :299013 Mean :1.071 H :86619
## 3rd Qu.:448518 3rd Qu.:1.500 D :73630
## Max. :598024 Max. :9.250 I :70282
## (Other):84233
## clarity table depth cert
## SI1 :116631 Min. : 0.00 Min. : 0.00 GIA :463555
## VS2 :111082 1st Qu.:56.00 1st Qu.:61.00 IGI : 43667
## SI2 :104300 Median :58.00 Median :62.10 EGL : 33814
## VS1 : 97730 Mean :57.63 Mean :61.06 EGL USA : 16079
## VVS2 : 65500 3rd Qu.:59.00 3rd Qu.:62.70 EGL Intl. : 11447
## VVS1 : 54798 Max. :75.90 Max. :81.30 EGL ISRAEL: 11301
## (Other): 47983 (Other) : 18161
rep(1:3,5)
## [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3
ajay=c(1,2,3,1,1,2)
str(ajay)
## num [1:6] 1 2 3 1 1 2
ajay=as.factor(ajay)
ajay
## [1] 1 2 3 1 1 2
## Levels: 1 2 3
str(BigDiamonds2)
## Classes 'tbl_df', 'tbl' and 'data.frame': 598024 obs. of 13 variables:
## $ X1 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ carat : num 0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
## $ cut : Factor w/ 3 levels "Good","Ideal",..: 3 1 1 3 3 1 1 3 3 1 ...
## $ color : Factor w/ 9 levels "D","E","F","G",..: 8 4 7 1 8 4 4 1 8 3 ...
## $ clarity : Factor w/ 9 levels "I1","I2","IF",..: 1 1 2 1 1 5 5 1 5 4 ...
## $ table : num 59 61 58 60 59 60 63 61 57.5 65 ...
## $ depth : num 63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
## $ cert : Factor w/ 9 levels "AGS","EGL","EGL Intl.",..: 6 6 6 6 2 6 6 6 8 6 ...
## $ measurements: Factor w/ 241452 levels "0 x 0 x 3.19",..: 19589 21547 48106 15330 36974 14290 14029 19271 16744 15806 ...
## $ price : int NA NA NA NA NA NA NA NA NA NA ...
## $ x : num 3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
## $ y : num 3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
## $ z : num 2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 13
## .. ..$ X1 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ carat : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ cut : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ color : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ clarity : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ table : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ depth : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ cert : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ measurements: list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ price : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ x : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ y : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ z : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
unique(BigDiamonds2$cut)
## [1] V.Good Good Ideal
## Levels: Good Ideal V.Good
table(BigDiamonds2$cut)
##
## Good Ideal V.Good
## 59680 369448 168896
table(BigDiamonds2$cut,BigDiamonds2$color)
##
## D E F G H I J K L
## Good 6604 9733 9141 8923 7600 7380 5357 3467 1475
## Ideal 45435 55547 58148 62067 56026 43000 29440 14729 5056
## V.Good 21591 28203 26284 25214 22993 19902 13912 7672 3125
BigDiamonds2$cut[1:5]
## [1] V.Good Good Good V.Good V.Good
## Levels: Good Ideal V.Good
class(BigDiamonds2$cut[1:5])
## [1] "factor"
BigDiamonds2[1:5,3]
## # A tibble: 5 x 1
## cut
## <fct>
## 1 V.Good
## 2 Good
## 3 Good
## 4 V.Good
## 5 V.Good
class(BigDiamonds2[1:5,3])
## [1] "tbl_df" "tbl" "data.frame"
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
summary(BigDiamonds2$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 300 1220 3503 8753 11174 99990 713
describe(BigDiamonds2$price)
## BigDiamonds2$price
## n missing distinct Info Mean Gmd .05 .10
## 597311 713 40312 1 8753 11059 612 736
## .25 .50 .75 .90 .95
## 1220 3503 11174 22209 33665
##
## lowest : 300 301 302 303 304, highest: 99930 99942 99960 99966 99990
summarize(BigDiamonds2$price,BigDiamonds2$color,mean)
## BigDiamonds2$color BigDiamonds2$price
## 1 D NA
## 2 E NA
## 3 F NA
## 4 G NA
## 5 H NA
## 6 I NA
## 7 J NA
## 8 K NA
## 9 L NA
nrow(BigDiamonds2)
## [1] 598024
BigDiamonds3=na.omit(BigDiamonds2)
nrow(BigDiamonds2)-nrow(BigDiamonds3)
## [1] 4240
summarize(BigDiamonds3$price,BigDiamonds3$color,mean)
## BigDiamonds3$color BigDiamonds3$price
## 1 D 8270.383
## 2 E 7294.028
## 3 F 8240.237
## 4 G 8990.918
## 5 H 9935.285
## 6 I 9536.633
## 7 J 9419.658
## 8 K 9706.482
## 9 L 7110.074
mean(BigDiamonds3$price)
## [1] 8755.809
median(BigDiamonds3$price)
## [1] 3503
sd(BigDiamonds3$price)
## [1] 13022.11
a=c(10,44,3,66,99,1)
a[order(a)]
## [1] 1 3 10 44 66 99
for (i in 1:10){print(i)}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10