ls()
## character(0)
rm(list=ls())
gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 294441  7.9     592000 15.9   460000 12.3
## Vcells 331836  2.6     786432  6.0   677529  5.2
memory.size()
## [1] 16.97
memory.limit()
## [1] 1535
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
library(MASS)
data(Boston)

str(Boston)
## 'data.frame':    506 obs. of  14 variables:
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ black  : num  397 397 393 395 397 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
?Boston
## starting httpd help server ... done
table(Boston$rm)
## 
## 3.561 3.863 4.138 4.368 4.519 4.628 4.652  4.88 4.903 4.906 4.926 4.963 
##     1     1     2     1     1     1     1     1     1     1     1     1 
##  4.97 4.973     5 5.012 5.019 5.036 5.093 5.155 5.186 5.272 5.277 5.304 
##     1     1     1     1     1     1     1     1     1     1     1     2 
## 5.344 5.349 5.362  5.39 5.399 5.403 5.404 5.412 5.414 5.427 5.453 5.454 
##     1     1     1     2     1     1     2     1     1     1     1     1 
## 5.456 5.468  5.52 5.531 5.536  5.56 5.565 5.569  5.57 5.572 5.593 5.594 
##     1     1     1     1     1     1     1     1     1     1     1     1 
## 5.597 5.599 5.602 5.604 5.605 5.608 5.613 5.617 5.627 5.628 5.631 5.637 
##     1     1     1     1     1     1     1     1     1     1     1     1 
## 5.648 5.663  5.67 5.682 5.683 5.693 5.701 5.705 5.706 5.707 5.708 5.709 
##     1     1     1     1     1     1     1     1     1     1     1     1 
## 5.713 5.727 5.731 5.741 5.747 5.757 5.759 5.762 5.782 5.783 5.786 5.787 
##     3     1     1     1     1     2     1     1     1     1     1     1 
##  5.79 5.794 5.803 5.807 5.813 5.818 5.822 5.834 5.836 5.837 5.841  5.85 
##     1     1     1     1     2     1     1     1     1     1     1     1 
## 5.851 5.852 5.854 5.856 5.857 5.859 5.868 5.869  5.87 5.871 5.872 5.874 
##     1     1     2     2     1     1     1     1     1     1     1     1 
## 5.875 5.876 5.877 5.878 5.879  5.88 5.884 5.885 5.887 5.888 5.889 5.891 
##     2     1     1     1     1     1     1     1     1     2     1     1 
## 5.895 5.896 5.898 5.905 5.913 5.914  5.92 5.924 5.926 5.927 5.928 5.933 
##     1     1     1     1     1     1     1     1     2     1     1     1 
## 5.935 5.936 5.942 5.949  5.95 5.951 5.952 5.957  5.96 5.961 5.963 5.965 
##     2     2     1     1     1     1     1     1     1     2     1     1 
## 5.966 5.968 5.972 5.976 5.981 5.983 5.985 5.986 5.987  5.99 5.998 6.003 
##     2     1     1     1     1     2     1     1     1     1     1     1 
## 6.004 6.006 6.009 6.012 6.014 6.015 6.019  6.02 6.021 6.023 6.027  6.03 
##     2     1     2     1     1     1     1     1     1     1     1     2 
## 6.031 6.037 6.041 6.047 6.051 6.059 6.064 6.065 6.066 6.069 6.072 6.081 
##     1     1     1     1     1     1     1     1     1     1     1     1 
## 6.083 6.086 6.092 6.095 6.096 6.101 6.103 6.108 6.112 6.113 6.114 6.115 
##     1     1     1     1     2     1     1     2     1     1     1     1 
##  6.12 6.121 6.122 6.127 6.129  6.13 6.137  6.14 6.142 6.144 6.145 6.151 
##     1     1     2     3     1     1     1     1     1     2     1     1 
## 6.152 6.153 6.162 6.163 6.164 6.167 6.169 6.172 6.174 6.176 6.182 6.185 
##     2     1     2     1     1     3     1     1     1     1     1     2 
## 6.193 6.195 6.202 6.208 6.209 6.211 6.212 6.216 6.219 6.223 6.226 6.229 
##     2     1     1     1     2     2     1     1     1     1     1     3 
##  6.23 6.232  6.24 6.242 6.245 6.249  6.25 6.251 6.254 6.266 6.273 6.279 
##     1     1     1     1     1     1     1     2     1     1     1     1 
## 6.286  6.29 6.297 6.301 6.302  6.31 6.312 6.315 6.316 6.317 6.319 6.326 
##     1     1     1     1     1     1     2     2     1     1     1     2 
## 6.333 6.335 6.341 6.343 6.345 6.348 6.358 6.372 6.373 6.375 6.376 6.377 
##     1     1     1     1     1     1     1     1     1     1     2     1 
##  6.38 6.382 6.383 6.389 6.393 6.395 6.398 6.402 6.404 6.405 6.406 6.411 
##     2     1     1     1     1     1     1     1     1     3     1     1 
## 6.415 6.416 6.417 6.421 6.425 6.426  6.43 6.431 6.433 6.434 6.436 6.437 
##     1     1     3     1     1     1     1     2     1     1     1     1 
## 6.438 6.442 6.453 6.454 6.456 6.458 6.459 6.461 6.471 6.474 6.481 6.482 
##     1     1     1     1     1     1     1     1     1     1     1     1 
## 6.484 6.485 6.487  6.49 6.495  6.51 6.511 6.513 6.516 6.525 6.538  6.54 
##     1     1     1     1     2     1     1     1     1     1     1     1 
## 6.545 6.546 6.549 6.552 6.556 6.563 6.565 6.567 6.575 6.579  6.59 6.593 
##     1     1     1     1     1     1     1     1     1     1     1     1 
## 6.595 6.604 6.606 6.616 6.618 6.619 6.625 6.629  6.63 6.631 6.635 6.642 
##     1     1     1     1     1     1     1     1     2     1     2     1 
## 6.649 6.655 6.657 6.674 6.678 6.683 6.696 6.701 6.715 6.718 6.726 6.727 
##     1     1     1     1     1     1     1     1     1     1     1     2 
## 6.728 6.739 6.749  6.75 6.758 6.762  6.77 6.781 6.782 6.794   6.8 6.812 
##     2     1     1     1     1     1     1     1     2     2     1     1 
## 6.816 6.824 6.826 6.833 6.842 6.849 6.852 6.854  6.86 6.861 6.871 6.874 
##     1     1     1     1     1     1     1     1     1     1     1     1 
## 6.879 6.897 6.939 6.943 6.951 6.957 6.968 6.975 6.976  6.98 6.982 6.998 
##     1     1     1     1     2     1     2     1     1     2     1     1 
## 7.007 7.014 7.016 7.024 7.041 7.061 7.079 7.088 7.104 7.107 7.135 7.147 
##     1     1     1     1     1     1     1     1     1     1     1     1 
## 7.148 7.155 7.163 7.178 7.185 7.203 7.206 7.236 7.241 7.249 7.267 7.274 
##     1     1     1     1     2     1     1     1     1     1     1     1 
## 7.287 7.313 7.327 7.333 7.358 7.393 7.412 7.416  7.42 7.454  7.47 7.489 
##     1     1     1     1     1     1     1     1     1     1     1     1 
##  7.52  7.61 7.645 7.686 7.691 7.765 7.802  7.82 7.831 7.853 7.875 7.923 
##     1     1     1     1     1     1     1     2     1     1     1     1 
## 7.929 8.034  8.04 8.069 8.247 8.259 8.266 8.297 8.337 8.375 8.398 8.704 
##     1     1     1     1     1     1     1     1     1     1     1     1 
## 8.725  8.78 
##     1     1
boxplot(Boston$rm)

attach(Boston)
mean(medv)
## [1] 22.53281
summary(medv)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   17.02   21.20   22.53   25.00   50.00
summary(rm)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.561   5.886   6.208   6.285   6.624   8.780
sqldf("select medv,rm from Boston where rm>7")
## Loading required package: tcltk
##    medv    rm
## 1  34.7 7.185
## 2  36.2 7.147
## 3  34.9 7.024
## 4  35.4 7.249
## 5  33.0 7.104
## 6  23.6 7.007
## 7  28.7 7.079
## 8  38.7 8.069
## 9  43.8 7.820
## 10 33.2 7.416
## 11 50.0 7.489
## 12 50.0 7.802
## 13 50.0 8.375
## 14 50.0 7.929
## 15 39.8 7.765
## 16 37.9 7.155
## 17 50.0 7.831
## 18 34.9 7.185
## 19 36.4 7.178
## 20 50.0 7.875
## 21 33.3 7.287
## 22 30.3 7.107
## 23 34.6 7.274
## 24 32.9 7.135
## 25 42.3 7.610
## 26 48.5 7.853
## 27 50.0 8.034
## 28 44.8 8.266
## 29 50.0 8.725
## 30 37.6 8.040
## 31 31.6 7.163
## 32 46.7 7.686
## 33 31.7 7.412
## 34 41.7 8.337
## 35 48.3 8.247
## 36 31.5 7.358
## 37 42.8 8.259
## 38 44.0 7.454
## 39 50.0 8.704
## 40 36.0 7.333
## 41 33.8 7.203
## 42 43.1 7.520
## 43 48.8 8.398
## 44 31.0 7.327
## 45 36.5 7.206
## 46 30.7 7.014
## 47 50.0 8.297
## 48 43.5 7.470
## 49 35.2 7.691
## 50 33.2 7.267
## 51 45.4 7.820
## 52 46.0 7.645
## 53 50.0 7.923
## 54 32.2 7.088
## 55 37.3 7.148
## 56 29.0 7.041
## 57 36.1 7.236
## 58 33.4 7.420
## 59 32.7 7.241
## 60 21.9 8.780
## 61 50.0 7.016
## 62 15.0 7.313
## 63 17.8 7.393
## 64 25.0 7.061
sqldf("select avg(medv),
              avg(rm),
                chas 
      from 
                Boston
      where     rm>7 
      group by
                chas")
##   avg(medv)  avg(rm) chas
## 1  37.87679 7.535482    0
## 2  42.03750 7.812375    1
#CHARACHTER TO NUMERIC
money=c(50000,"50000","$50000","50,000","$50,000")

#Giving average of money using R code

mean(money)
## Warning in mean.default(money): argument is not numeric or logical:
## returning NA
## [1] NA
str(money)
##  chr [1:5] "50000" "50000" "$50000" "50,000" "$50,000"
money2=as.numeric(money)
## Warning: NAs introduced by coercion
money2
## [1] 50000 50000    NA    NA    NA
money2=gsub(",","",money)
money3=gsub("\\$","",money2)
money3
## [1] "50000" "50000" "50000" "50000" "50000"
money3=as.numeric(money3)
mean(money3)
## [1] 50000
#CHARACHTER TO DATES

dobofclass=c("1April2007",
             "28th july 1984",
             "05 May 1988",
             "29nov-2008")



strptime("29nov-2008","%d%b-%Y")
## [1] "2008-11-29 IST"
strptime("05 May 1988","%d%b-%Y")
## [1] NA
strptime("05 May 1988","%d %B %Y")
## [1] "1988-05-05 IST"
library(lubridate)

dmy(dobofclass)
## [1] "2007-04-01 UTC" "1984-07-28 UTC" "1988-05-05 UTC" "2008-11-29 UTC"
Sys.Date()
## [1] "2015-12-12"
#Differences in dates is given by difftime
difftime(Sys.Date(),dmy(dobofclass))
## Time differences in days
## [1]  3177 11459 10082  2569
?strptime
#Converting to Charachter
x=c(23,56,78,89)
as.character(x)
## [1] "23" "56" "78" "89"
paste(x)
## [1] "23" "56" "78" "89"
paste("ajay",dobofclass[1])
## [1] "ajay 1April2007"
paste("student1",dobofclass[2])
## [1] "student1 28th july 1984"
substr("ajay",2,3)
## [1] "ja"
namclass=c("Ajay","Ajith","Sudeeptha","Yogisha")
#give me first initial of every memmber of nclass i.e.A,A,S,Y
substr(namclass,1,1)
## [1] "A" "A" "S" "Y"
nchar(namclass)
## [1] 4 5 9 7
#give me last initial of every member of nclass
substr(namclass,nchar(namclass),nchar(namclass))#MODIFY THIS
## [1] "y" "h" "a" "a"
data("mtcars")
library(sqldf)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
#Give me average mpg for every carb and every cyl

sqldf("select avg(mpg)  from mtcars ")
##   avg(mpg)
## 1 20.09062
sqldf("select avg(mpg),cyl from mtcars group by cyl")
##   avg(mpg) cyl
## 1 26.66364   4
## 2 19.74286   6
## 3 15.10000   8
sqldf("select avg(mpg),cyl,gear from mtcars group by cyl,gear")
##   avg(mpg) cyl gear
## 1   21.500   4    3
## 2   26.925   4    4
## 3   28.200   4    5
## 4   19.750   6    3
## 5   19.750   6    4
## 6   19.700   6    5
## 7   15.050   8    3
## 8   15.400   8    5
getwd()
## [1] "C:/Users/dell/Desktop/Teaching"
dir("C:/Users/dell/Desktop/",pattern=".csv")
## [1] "Analytics decisionstats.com Audience Overview 20110617-20120717.csv"
## [2] "BigDiamonds.csv"                                                    
## [3] "BigDiamonds.csv.zip"                                                
## [4] "Boston.csv"                                                         
## [5] "ccFraud.csv"                                                        
## [6] "test.csv"
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, mday, month, quarter, wday, week, yday, year
diamonds=fread("C:/Users/dell/Desktop/BigDiamonds.csv")
## 
Read 1.7% of 598024 rows
Read 16.7% of 598024 rows
Read 38.5% of 598024 rows
Read 53.5% of 598024 rows
Read 63.5% of 598024 rows
Read 68.6% of 598024 rows
Read 93.6% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:12
tables()
##      NAME        NROW NCOL MB
## [1,] diamonds 598,024   13 75
##      COLS                                                                
## [1,] V1,carat,cut,color,clarity,table,depth,cert,measurements,price,x,y,z
##      KEY
## [1,]    
## Total: 75MB
library("sqldf")
sqldf("select avg(price) from diamonds")#1 
##   avg(price)
## 1   8753.018
diamonds[,mean(price,na.rm=T),]
## [1] 8753.018
sqldf("select avg(carat) from diamonds")#2
##   avg(carat)
## 1   1.071297
##DT[I Select ,J Operators,K Group By]
diamonds[,mean(carat,na.rm=T),]
## [1] 1.071297
sqldf("select 
        avg(carat),color 
      from diamonds 
      group by color")#3
##   avg(carat) color
## 1  0.8266182     D
## 2  0.8318824     E
## 3  0.9410532     F
## 4  1.0638408     G
## 5  1.2099407     H
## 6  1.2712823     I
## 7  1.3475399     J
## 8  1.4950646     K
## 9  1.3632705     L
diamonds[,mean(carat,na.rm=T),color]
##    color        V1
## 1:     K 1.4950646
## 2:     G 1.0638408
## 3:     J 1.3475399
## 4:     D 0.8266182
## 5:     F 0.9410532
## 6:     E 0.8318824
## 7:     H 1.2099407
## 8:     I 1.2712823
## 9:     L 1.3632705
sqldf("select
      avg(carat),cut 
      from diamonds group by cut")#4
##   avg(carat)    cut
## 1  0.9003031   Good
## 2  1.1201936  Ideal
## 3  1.0247597 V.Good
diamonds[,mean(carat),cut]
##       cut        V1
## 1: V.Good 1.0247597
## 2:   Good 0.9003031
## 3:  Ideal 1.1201936
diamonds[carat>3,
         .(mean(carat),mean(price,na.rm=T)),
         cut]
##       cut       V1       V2
## 1:   Good 3.675146 43480.94
## 2: V.Good 3.616123 45957.94
## 3:  Ideal 3.548771 49433.78
# What is average price for 
diamonds[carat>4 & color=="K",mean(price,na.rm=T),]
## [1] 55531.86
diamonds[carat>4 ,mean(price,na.rm=T),color]
##    color       V1
## 1:     D 55053.01
## 2:     E 55229.97
## 3:     G 58614.04
## 4:     K 55531.86
## 5:     J 60725.33
## 6:     I 65464.33
## 7:     H 62630.72
## 8:     F 59957.27
## 9:     L 49344.77
diamonds[carat<3 & color=="J",mean(price,na.rm=T),]
## [1] 5767.972
diamonds[carat<3 ,mean(price,na.rm=T),color]
##    color       V1
## 1:     K 5721.485
## 2:     G 7129.863
## 3:     J 5767.972
## 4:     D 7794.984
## 5:     F 6931.228
## 6:     E 6580.012
## 7:     H 7291.754
## 8:     I 6809.520
## 9:     L 3932.475
diamonds[carat<4.5 & color=="K" & cut=="Ideal",mean(price,na.rm=T),]
## [1] 10046.97
diamonds[carat<4.5 ,mean(price,na.rm=T),.(color,cut)]
##     color    cut        V1
##  1:     K V.Good  7598.953
##  2:     G   Good  5611.800
##  3:     J   Good  5038.752
##  4:     D V.Good  6364.563
##  5:     F   Good  4919.152
##  6:     F V.Good  6908.591
##  7:     G V.Good  7866.847
##  8:     J V.Good  7281.392
##  9:     E   Good  4156.141
## 10:     D   Good  4644.282
## 11:     E  Ideal  8297.600
## 12:     E V.Good  6210.817
## 13:     H V.Good  7859.716
## 14:     F  Ideal  9183.598
## 15:     H   Good  5839.827
## 16:     I   Good  5096.376
## 17:     K   Good  4935.032
## 18:     I V.Good  7493.896
## 19:     L   Good  4138.479
## 20:     I  Ideal 10361.547
## 21:     G  Ideal  9653.360
## 22:     D  Ideal  9605.492
## 23:     J  Ideal  9676.903
## 24:     H  Ideal 10870.121
## 25:     L V.Good  5259.427
## 26:     K  Ideal 10046.967
## 27:     L  Ideal  7059.294
##     color    cut        V1
diamonds[price>3000 ,mean(price,na.rm=T),.(color,cut)]
##     color    cut        V1
##  1:     G V.Good 13912.689
##  2:     F  Ideal 16560.190
##  3:     J  Ideal 15106.234
##  4:     G  Ideal 16127.682
##  5:     E   Good  9562.663
##  6:     I   Good 10568.739
##  7:     I V.Good 13373.244
##  8:     E V.Good 12512.359
##  9:     H V.Good 13546.145
## 10:     G   Good 11217.941
## 11:     F V.Good 13153.974
## 12:     E  Ideal 16549.568
## 13:     I  Ideal 15601.083
## 14:     H  Ideal 16766.347
## 15:     D  Ideal 17991.784
## 16:     D V.Good 13063.586
## 17:     J   Good 10946.462
## 18:     K  Ideal 15217.389
## 19:     F   Good 11114.473
## 20:     H   Good 11490.488
## 21:     K V.Good 15361.029
## 22:     J V.Good 14305.686
## 23:     D   Good 10355.956
## 24:     L  Ideal 12607.748
## 25:     L V.Good 13453.077
## 26:     K   Good 12485.608
## 27:     L   Good 13843.234
##     color    cut        V1
diamonds[price>3000 & color=="I" & cut=="V.Good",mean(price,na.rm=T),]
## [1] 13373.24
library(Hmisc)
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## 
## The following object is masked _by_ '.GlobalEnv':
## 
##     diamonds
## 
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units

summary(diamonds$price)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     300    1220    3503    8753   11170   99990     713
describe(diamonds$price)
## diamonds$price 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##  597311     713   40312       1    8753     612     736    1220    3503 
##     .75     .90     .95 
##   11174   22209   33665 
## 
## lowest :   300   301   302   303   304
## highest: 99930 99942 99960 99966 99990
diamonds2=na.omit(diamonds)
summarize(diamonds2$price,llist(diamonds2$cut,diamonds2$color),max)
##    diamonds2$cut diamonds2$color diamonds2$price
## 1           Good               D           97164
## 2           Good               E           99920
## 3           Good               F           99630
## 4           Good               G           96536
## 5           Good               H           99359
## 6           Good               I           93780
## 7           Good               J           99806
## 8           Good               K           96073
## 9           Good               L           68793
## 10         Ideal               D           99920
## 11         Ideal               E           99930
## 12         Ideal               F           99960
## 13         Ideal               G           99930
## 14         Ideal               H           99990
## 15         Ideal               I           99910
## 16         Ideal               J           99630
## 17         Ideal               K           99690
## 18         Ideal               L           95814
## 19        V.Good               D           99870
## 20        V.Good               E           99966
## 21        V.Good               F           99890
## 22        V.Good               G           99472
## 23        V.Good               H           99810
## 24        V.Good               I           99942
## 25        V.Good               J           98273
## 26        V.Good               K           98934
## 27        V.Good               L           95253
#install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:Hmisc':
## 
##     combine, src, summarize
## 
## The following objects are masked from 'package:data.table':
## 
##     between, last
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following object is masked from 'package:MASS':
## 
##     select
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
diamonds3=select(diamonds,price,carat,cut,color)
diamonds3
##         price carat    cut color
##      1:    NA  0.25 V.Good     K
##      2:    NA  0.23   Good     G
##      3:    NA  0.34   Good     J
##      4:    NA  0.21 V.Good     D
##      5:    NA  0.31 V.Good     K
##     ---                         
## 598020: 99930  3.02  Ideal     E
## 598021: 99942  5.01 V.Good     I
## 598022: 99960  3.43  Ideal     F
## 598023: 99966  3.01 V.Good     E
## 598024: 99990  4.13  Ideal     H
diamonds4=transmute(prbycarat=price/carat,diamonds3)
diamonds4
##         prbycarat
##      1:        NA
##      2:        NA
##      3:        NA
##      4:        NA
##      5:        NA
##     ---          
## 598020:  33089.40
## 598021:  19948.50
## 598022:  29142.86
## 598023:  33211.30
## 598024:  24210.65
sample_n(diamonds,600,F)
##          V1 carat    cut color clarity table depth      cert
##   1: 194441  0.59  Ideal     J      IF    57  61.9       GIA
##   2: 162462  0.60   Good     F     SI2    58  64.7       GIA
##   3:  99624  0.31  Ideal     F    VVS2    57  62.6       GIA
##   4: 379140  1.20  Ideal     F     VS2    57  62.7 EGL Intl.
##   5: 184421  0.60 V.Good     J     VS1    57  63.2       GIA
##  ---                                                        
## 596: 554867  1.52  Ideal     G    VVS1    57  62.2       GIA
## 597: 556055  3.08  Ideal     J     SI2    57  62.5       EGL
## 598: 593859  4.67   Good     F     SI2    60  59.2       GIA
## 599:  16597  0.35  Ideal     K     SI1    57  62.4       GIA
## 600: 392909  0.90  Ideal     G     VS1    57  62.5       GIA
##                  measurements price     x     y    z
##   1:       5.38 x 5.36 x 3.33  1681  5.38  5.36 3.33
##   2:       5.26 x 5.27 x 3.41  1333  5.26  5.27 3.41
##   3:       4.33 x 4.31 x 2.71   890  4.33  4.31 2.71
##   4:       6.74 x 6.78 x 4.24  6110  6.74  6.78 4.24
##   5:       5.37 x 5.29 x 3.37  1566  5.37  5.29 3.37
##  ---                                                
## 596:       7.36 x 7.33 x 4.57 18878  7.36  7.33 4.57
## 597:       9.28 x 9.24 x 5.78 19210  9.28  9.24 5.78
## 598: 10.69  x  10.87  x  6.40 63272 10.69 10.87 6.40
## 599:       4.53 x 4.51 x 2.82   540  4.53  4.51 2.82
## 600:       6.17 x 6.12 x 3.84  6767  6.17  6.12 3.84
sample_frac(diamonds,0.01)
##           V1 carat    cut color clarity table depth cert
##    1: 577674  2.02  Ideal     G     VS1    57  62.3  GIA
##    2: 230810  0.60 V.Good     D     SI1    57  63.8  GIA
##    3:  33973  0.23  Ideal     E    VVS1    57  61.2  GIA
##    4: 221946  0.70  Ideal     F     SI2    57  62.1  IGI
##    5: 367651  1.41 V.Good     H     SI1    59  63.5  EGL
##   ---                                                   
## 5976:  94131  0.37  Ideal     E     VS2    59  60.4  GIA
## 5977: 431896  1.60  Ideal     H     VS1    57  62.4  EGL
## 5978:   7494  0.33 V.Good     F      I1    57  62.8  GIA
## 5979: 373729  0.90 V.Good     F    VVS2    56  62.8  GIA
## 5980: 498128  2.01 V.Good     H     VS1    56  64.3  GIA
##                 measurements price    x    y    z
##    1:     8.07 x 8.12 x 5.04 30109 8.07 8.12 5.04
##    2:     5.33 x 5.31 x 3.39  2130 5.33 5.31 3.39
##    3:     3.96 x 3.98 x 2.43   630 3.96 3.98 2.43
##    4:     5.66 x 5.69 x 3.53  2020 5.66 5.69 3.53
##    5:     7.03 x 6.99 x 4.45  5620 7.03 6.99 4.45
##   ---                                            
## 5976:     4.64 x 4.66 x 2.81   866 4.64 4.66 2.81
## 5977:     4.66 x 7.46 x 7.48  9424 4.66 7.46 7.48
## 5978:     4.42 x 4.44 x 2.78   470 4.42 4.44 2.78
## 5979:     6.15 x 6.11 x 3.85  5879 6.15 6.11 3.85
## 5980: 7.89  x  7.96  x  5.09 22896 7.89 7.96 5.09
#vignette("introduction",package="dplyr")
#using vignette and summarize 
#do the following in dplyr


dcolor <- group_by(diamonds, color)
summarise(dcolor,mean(price,na.rm=T))
## Source: local data table [9 x 2]
## 
##   color mean(price, na.rm = T)
## 1     K               9694.257
## 2     G               8984.200
## 3     J               9423.581
## 4     D               8266.346
## 5     F               8234.730
## 6     E               7282.990
## 7     H               9941.795
## 8     I               9541.319
## 9     L               7109.228
diamondstest=filter(diamonds,carat>4)
dcolor <- group_by(diamondstest, color)
summarise(dcolor,mean(price,na.rm=T))
## Source: local data table [9 x 2]
## 
##   color mean(price, na.rm = T)
## 1     D               55053.01
## 2     E               55229.97
## 3     G               58614.04
## 4     K               55531.86
## 5     J               60725.33
## 6     I               65464.33
## 7     H               62630.72
## 8     F               59957.27
## 9     L               49344.77
diamondstest2=filter(diamonds,carat<3 & color=="J")
dcolor2 <- group_by(diamondstest2, color)
summarise(dcolor2,mean(price,na.rm=T))
## Source: local data table [1 x 2]
## 
##   color mean(price, na.rm = T)
## 1     J               5767.972
diamondstest3=filter(diamonds,carat<4.5 & color=="K" & cut=="Ideal")
dcolor3 = group_by(diamondstest3,color)
summarise(dcolor3,mean(price,na.rm=T))
## Source: local data table [1 x 2]
## 
##   color mean(price, na.rm = T)
## 1     K               10046.97
diamondstest4=filter(diamonds,carat<3 & color=="J" & cut =="Ideal")
dcut2=group_by(diamondstest4,cut)
summarise(dcut2, mean(price, na.rm = T))
## Source: local data table [1 x 2]
## 
##     cut mean(price, na.rm = T)
## 1 Ideal               6639.536
diamondstest4=filter(diamonds,
                     price>3000 & 
                       color=="I" & 
                       cut=="V.Good")
dcut2=group_by(diamondstest4,cut)
summarise(dcut2, mean(price, na.rm = T))
## Source: local data table [1 x 2]
## 
##      cut mean(price, na.rm = T)
## 1 V.Good               13373.24
# What is average price for  
 diamonds[price>3000 ,mean(price,na.rm=T),.(color,cut)]
##     color    cut        V1
##  1:     G V.Good 13912.689
##  2:     F  Ideal 16560.190
##  3:     J  Ideal 15106.234
##  4:     G  Ideal 16127.682
##  5:     E   Good  9562.663
##  6:     I   Good 10568.739
##  7:     I V.Good 13373.244
##  8:     E V.Good 12512.359
##  9:     H V.Good 13546.145
## 10:     G   Good 11217.941
## 11:     F V.Good 13153.974
## 12:     E  Ideal 16549.568
## 13:     I  Ideal 15601.083
## 14:     H  Ideal 16766.347
## 15:     D  Ideal 17991.784
## 16:     D V.Good 13063.586
## 17:     J   Good 10946.462
## 18:     K  Ideal 15217.389
## 19:     F   Good 11114.473
## 20:     H   Good 11490.488
## 21:     K V.Good 15361.029
## 22:     J V.Good 14305.686
## 23:     D   Good 10355.956
## 24:     L  Ideal 12607.748
## 25:     L V.Good 13453.077
## 26:     K   Good 12485.608
## 27:     L   Good 13843.234
##     color    cut        V1
diamonds[price>3000 & 
           color=="I" & 
           cut=="V.Good",
         mean(price,na.rm=T),
         ]
## [1] 13373.24