#install.packages("abc")
#install.packages('zoo')

BigDiamonds <- read.csv("C:/Users/ajaohri/Desktop/mentoring/BigDiamonds.csv")



#readr is faster than base import

ls()
## [1] "BigDiamonds"
rm(BigDiamonds)

a=c(1,2,3,4)

rm(list=ls())


library(readr)
BigDiamonds2 <- read_csv("C:/Users/ajaohri/Desktop/mentoring/BigDiamonds.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_integer(),
##   carat = col_double(),
##   cut = col_character(),
##   color = col_character(),
##   clarity = col_character(),
##   table = col_double(),
##   depth = col_double(),
##   cert = col_character(),
##   measurements = col_character(),
##   price = col_integer(),
##   x = col_double(),
##   y = col_double(),
##   z = col_double()
## )
head(BigDiamonds2)
## # A tibble: 6 x 13
##      X1 carat cut   color clarity table depth cert  measurements price
##   <int> <dbl> <chr> <chr> <chr>   <dbl> <dbl> <chr> <chr>        <int>
## 1     1  0.25 V.Go~ K     I1         59  63.7 GIA   3.96 x 3.95~    NA
## 2     2  0.23 Good  G     I1         61  58.1 GIA   4.00 x 4.05~    NA
## 3     3  0.34 Good  J     I2         58  58.7 GIA   4.56 x 4.53~    NA
## 4     4  0.21 V.Go~ D     I1         60  60.6 GIA   3.80 x 3.82~    NA
## 5     5  0.31 V.Go~ K     I1         59  62.2 EGL   4.35 x 4.26~    NA
## 6     6  0.2  Good  G     SI2        60  64.4 GIA   3.74 x 3.67~    NA
## # ... with 3 more variables: x <dbl>, y <dbl>, z <dbl>
tail(BigDiamonds2)
## # A tibble: 6 x 13
##       X1 carat cut   color clarity table depth cert  measurements price
##    <int> <dbl> <chr> <chr> <chr>   <dbl> <dbl> <chr> <chr>        <int>
## 1 598019  3.01 Ideal D     VS2      58    62   GIA   9.25 x 9.2 ~ 99920
## 2 598020  3.02 Ideal E     VVS2     58    59.8 HRD   9.43 x 9.51~ 99930
## 3 598021  5.01 V.Go~ I     VVS2     63.5  61.5 IGI   10.78  x  1~ 99942
## 4 598022  3.43 Ideal F     VS2      54    62.7 GIA   9.66 x 9.61~ 99960
## 5 598023  3.01 V.Go~ E     VS1      58    62.9 GIA   9.15 x 9.19~ 99966
## 6 598024  4.13 Ideal H     IF       56    62.5 IGI   10.27 x 10.~ 99990
## # ... with 3 more variables: x <dbl>, y <dbl>, z <dbl>
head(BigDiamonds2,10)
## # A tibble: 10 x 13
##       X1 carat cut   color clarity table depth cert  measurements price
##    <int> <dbl> <chr> <chr> <chr>   <dbl> <dbl> <chr> <chr>        <int>
##  1     1  0.25 V.Go~ K     I1       59    63.7 GIA   3.96 x 3.95~    NA
##  2     2  0.23 Good  G     I1       61    58.1 GIA   4.00 x 4.05~    NA
##  3     3  0.34 Good  J     I2       58    58.7 GIA   4.56 x 4.53~    NA
##  4     4  0.21 V.Go~ D     I1       60    60.6 GIA   3.80 x 3.82~    NA
##  5     5  0.31 V.Go~ K     I1       59    62.2 EGL   4.35 x 4.26~    NA
##  6     6  0.2  Good  G     SI2      60    64.4 GIA   3.74 x 3.67~    NA
##  7     7  0.2  Good  G     SI2      63    62.6 GIA   3.72 x 3.65~    NA
##  8     8  0.22 V.Go~ D     I1       61    59.2 GIA   3.95 x 3.97~    NA
##  9     9  0.23 V.Go~ K     SI2      57.5  63.6 IGI   3.87 x 3.90~    NA
## 10    10  0.2  Good  F     SI1      65    54.9 GIA   3.83 x 4.00~    NA
## # ... with 3 more variables: x <dbl>, y <dbl>, z <dbl>
str(BigDiamonds2)
## Classes 'tbl_df', 'tbl' and 'data.frame':    598024 obs. of  13 variables:
##  $ X1          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ carat       : num  0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
##  $ cut         : chr  "V.Good" "Good" "Good" "V.Good" ...
##  $ color       : chr  "K" "G" "J" "D" ...
##  $ clarity     : chr  "I1" "I1" "I2" "I1" ...
##  $ table       : num  59 61 58 60 59 60 63 61 57.5 65 ...
##  $ depth       : num  63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
##  $ cert        : chr  "GIA" "GIA" "GIA" "GIA" ...
##  $ measurements: chr  "3.96 x 3.95 x 2.52" "4.00 x 4.05 x 2.30" "4.56 x 4.53 x 2.67" "3.80 x 3.82 x 2.31" ...
##  $ price       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ x           : num  3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
##  $ y           : num  3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
##  $ z           : num  2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 13
##   .. ..$ X1          : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ carat       : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ cut         : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ color       : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ clarity     : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ table       : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ depth       : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ cert        : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ measurements: list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ price       : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ x           : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ y           : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ z           : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"
str(BigDiamonds2$depth)
##  num [1:598024] 63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
nrow(BigDiamonds2)
## [1] 598024
ncol(BigDiamonds2)
## [1] 13
length(BigDiamonds2)
## [1] 13
summary(BigDiamonds2)
##        X1             carat           cut               color          
##  Min.   :     1   Min.   :0.200   Length:598024      Length:598024     
##  1st Qu.:149507   1st Qu.:0.500   Class :character   Class :character  
##  Median :299013   Median :0.900   Mode  :character   Mode  :character  
##  Mean   :299013   Mean   :1.071                                        
##  3rd Qu.:448518   3rd Qu.:1.500                                        
##  Max.   :598024   Max.   :9.250                                        
##                                                                        
##    clarity              table           depth           cert          
##  Length:598024      Min.   : 0.00   Min.   : 0.00   Length:598024     
##  Class :character   1st Qu.:56.00   1st Qu.:61.00   Class :character  
##  Mode  :character   Median :58.00   Median :62.10   Mode  :character  
##                     Mean   :57.63   Mean   :61.06                     
##                     3rd Qu.:59.00   3rd Qu.:62.70                     
##                     Max.   :75.90   Max.   :81.30                     
##                                                                       
##  measurements           price             x                y         
##  Length:598024      Min.   :  300   Min.   : 0.150   Min.   : 1.000  
##  Class :character   1st Qu.: 1220   1st Qu.: 4.740   1st Qu.: 4.970  
##  Mode  :character   Median : 3503   Median : 5.780   Median : 6.050  
##                     Mean   : 8753   Mean   : 5.991   Mean   : 6.199  
##                     3rd Qu.:11174   3rd Qu.: 6.970   3rd Qu.: 7.230  
##                     Max.   :99990   Max.   :13.890   Max.   :13.890  
##                     NA's   :713     NA's   :1815     NA's   :1852    
##        z         
##  Min.   : 0.040  
##  1st Qu.: 3.120  
##  Median : 3.860  
##  Mean   : 4.033  
##  3rd Qu.: 4.610  
##  Max.   :13.180  
##  NA's   :2544
BigDiamonds2$cut=as.factor(BigDiamonds2$cut)
summary(BigDiamonds2$cut)
##   Good  Ideal V.Good 
##  59680 369448 168896
BigDiamonds2$color=as.factor(BigDiamonds2$color)
summary(BigDiamonds2$color)
##     D     E     F     G     H     I     J     K     L 
## 73630 93483 93573 96204 86619 70282 48709 25868  9656
BigDiamonds2[1:5,3] 
## # A tibble: 5 x 1
##   cut   
##   <fct> 
## 1 V.Good
## 2 Good  
## 3 Good  
## 4 V.Good
## 5 V.Good
ind <- sapply(BigDiamonds2, is.character) 
ind
##           X1        carat          cut        color      clarity 
##        FALSE        FALSE        FALSE        FALSE         TRUE 
##        table        depth         cert measurements        price 
##        FALSE        FALSE         TRUE         TRUE        FALSE 
##            x            y            z 
##        FALSE        FALSE        FALSE
head(BigDiamonds2[ind] )
## # A tibble: 6 x 3
##   clarity cert  measurements      
##   <chr>   <chr> <chr>             
## 1 I1      GIA   3.96 x 3.95 x 2.52
## 2 I1      GIA   4.00 x 4.05 x 2.30
## 3 I2      GIA   4.56 x 4.53 x 2.67
## 4 I1      GIA   3.80 x 3.82 x 2.31
## 5 I1      EGL   4.35 x 4.26 x 2.68
## 6 SI2     GIA   3.74 x 3.67 x 2.38
BigDiamonds2[ind] <- lapply(BigDiamonds2[ind], factor) 

summary(BigDiamonds2[1:8])
##        X1             carat           cut             color      
##  Min.   :     1   Min.   :0.200   Good  : 59680   G      :96204  
##  1st Qu.:149507   1st Qu.:0.500   Ideal :369448   F      :93573  
##  Median :299013   Median :0.900   V.Good:168896   E      :93483  
##  Mean   :299013   Mean   :1.071                   H      :86619  
##  3rd Qu.:448518   3rd Qu.:1.500                   D      :73630  
##  Max.   :598024   Max.   :9.250                   I      :70282  
##                                                   (Other):84233  
##     clarity           table           depth               cert       
##  SI1    :116631   Min.   : 0.00   Min.   : 0.00   GIA       :463555  
##  VS2    :111082   1st Qu.:56.00   1st Qu.:61.00   IGI       : 43667  
##  SI2    :104300   Median :58.00   Median :62.10   EGL       : 33814  
##  VS1    : 97730   Mean   :57.63   Mean   :61.06   EGL USA   : 16079  
##  VVS2   : 65500   3rd Qu.:59.00   3rd Qu.:62.70   EGL Intl. : 11447  
##  VVS1   : 54798   Max.   :75.90   Max.   :81.30   EGL ISRAEL: 11301  
##  (Other): 47983                                   (Other)   : 18161
rep(1:3,5)
##  [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3
ajay=c(1,2,3,1,1,2)
str(ajay)
##  num [1:6] 1 2 3 1 1 2
ajay=as.factor(ajay)
ajay
## [1] 1 2 3 1 1 2
## Levels: 1 2 3
str(BigDiamonds2)
## Classes 'tbl_df', 'tbl' and 'data.frame':    598024 obs. of  13 variables:
##  $ X1          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ carat       : num  0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
##  $ cut         : Factor w/ 3 levels "Good","Ideal",..: 3 1 1 3 3 1 1 3 3 1 ...
##  $ color       : Factor w/ 9 levels "D","E","F","G",..: 8 4 7 1 8 4 4 1 8 3 ...
##  $ clarity     : Factor w/ 9 levels "I1","I2","IF",..: 1 1 2 1 1 5 5 1 5 4 ...
##  $ table       : num  59 61 58 60 59 60 63 61 57.5 65 ...
##  $ depth       : num  63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
##  $ cert        : Factor w/ 9 levels "AGS","EGL","EGL Intl.",..: 6 6 6 6 2 6 6 6 8 6 ...
##  $ measurements: Factor w/ 241452 levels "0 x 0 x 3.19",..: 19589 21547 48106 15330 36974 14290 14029 19271 16744 15806 ...
##  $ price       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ x           : num  3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
##  $ y           : num  3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
##  $ z           : num  2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 13
##   .. ..$ X1          : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ carat       : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ cut         : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ color       : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ clarity     : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ table       : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ depth       : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ cert        : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ measurements: list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ price       : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ x           : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ y           : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ z           : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"
unique(BigDiamonds2$cut)
## [1] V.Good Good   Ideal 
## Levels: Good Ideal V.Good
table(BigDiamonds2$cut)
## 
##   Good  Ideal V.Good 
##  59680 369448 168896
table(BigDiamonds2$cut,BigDiamonds2$color)
##         
##              D     E     F     G     H     I     J     K     L
##   Good    6604  9733  9141  8923  7600  7380  5357  3467  1475
##   Ideal  45435 55547 58148 62067 56026 43000 29440 14729  5056
##   V.Good 21591 28203 26284 25214 22993 19902 13912  7672  3125
BigDiamonds2$cut[1:5]
## [1] V.Good Good   Good   V.Good V.Good
## Levels: Good Ideal V.Good
class(BigDiamonds2$cut[1:5])
## [1] "factor"
BigDiamonds2[1:5,3]
## # A tibble: 5 x 1
##   cut   
##   <fct> 
## 1 V.Good
## 2 Good  
## 3 Good  
## 4 V.Good
## 5 V.Good
class(BigDiamonds2[1:5,3])
## [1] "tbl_df"     "tbl"        "data.frame"
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
summary(BigDiamonds2$price)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     300    1220    3503    8753   11174   99990     713
describe(BigDiamonds2$price)
## BigDiamonds2$price 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   597311      713    40312        1     8753    11059      612      736 
##      .25      .50      .75      .90      .95 
##     1220     3503    11174    22209    33665 
## 
## lowest :   300   301   302   303   304, highest: 99930 99942 99960 99966 99990
summarize(BigDiamonds2$price,BigDiamonds2$color,mean)
##   BigDiamonds2$color BigDiamonds2$price
## 1                  D                 NA
## 2                  E                 NA
## 3                  F                 NA
## 4                  G                 NA
## 5                  H                 NA
## 6                  I                 NA
## 7                  J                 NA
## 8                  K                 NA
## 9                  L                 NA
nrow(BigDiamonds2)
## [1] 598024
BigDiamonds3=na.omit(BigDiamonds2)

nrow(BigDiamonds2)-nrow(BigDiamonds3)
## [1] 4240
summarize(BigDiamonds3$price,BigDiamonds3$color,mean)
##   BigDiamonds3$color BigDiamonds3$price
## 1                  D           8270.383
## 2                  E           7294.028
## 3                  F           8240.237
## 4                  G           8990.918
## 5                  H           9935.285
## 6                  I           9536.633
## 7                  J           9419.658
## 8                  K           9706.482
## 9                  L           7110.074
mean(BigDiamonds3$price)
## [1] 8755.809
median(BigDiamonds3$price)
## [1] 3503
sd(BigDiamonds3$price)
## [1] 13022.11
a=c(10,44,3,66,99,1)
a[order(a)]
## [1]  1  3 10 44 66 99
for (i in 1:10){print(i)}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10