ls()
## character(0)
memory.size()
## [1] 14.29
rm(list=ls())
gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 290908  7.8     592000 15.9   350000  9.4
## Vcells 321510  2.5     786432  6.0   677388  5.2
getwd()
## [1] "C:/Users/dell/Desktop"
setwd("C:/Users/dell/Desktop")
dir()
##  [1] "1.png"                                                              
##  [2] "2.png"                                                              
##  [3] "5128OS_09_01.jpg"                                                   
##  [4] "adult.data.txt"                                                     
##  [5] "airline.sas7bdat"                                                   
##  [6] "Analytics decisionstats.com Audience Overview 20110617-20120717.csv"
##  [7] "basicR.html"                                                        
##  [8] "basicR.R"                                                           
##  [9] "basicR.spin.R"                                                      
## [10] "basicR.spin.Rmd"                                                    
## [11] "BigDiamonds.csv"                                                    
## [12] "BigDiamonds.csv.zip"                                                
## [13] "Boston.csv"                                                         
## [14] "Cars.sav"                                                           
## [15] "casestudy"                                                          
## [16] "ccFraud.csv"                                                        
## [17] "Certificate Doc.docx"                                               
## [18] "CmapServer Download _ Cmap.html"                                    
## [19] "CmapServer Download _ Cmap_files"                                   
## [20] "Coxcombs.jpg"                                                       
## [21] "cricketparsing.R"                                                   
## [22] "data input.R"                                                       
## [23] "data_input.html"                                                    
## [24] "datatable"                                                          
## [25] "day8 session 4.fbr"                                                 
## [26] "day9 session 1.fbr"                                                 
## [27] "desktop.ini"                                                        
## [28] "Dropbox.lnk"                                                        
## [29] "exam.html"                                                          
## [30] "exam.R"                                                             
## [31] "ie_data.xls"                                                        
## [32] "lastsave.txt"                                                       
## [33] "lastsave2"                                                          
## [34] "library.docx"                                                       
## [35] "Minard.png"                                                         
## [36] "modules"                                                            
## [37] "modules.zip"                                                        
## [38] "my first code.R"                                                    
## [39] "mycode.docx"                                                        
## [40] "mycode.html"                                                        
## [41] "mycode.R"                                                           
## [42] "myfirstRcode.R"                                                     
## [43] "New folder"                                                         
## [44] "new1"                                                               
## [45] "Quiz 1 R.docx"                                                      
## [46] "rfmanalysis2.html"                                                  
## [47] "rfmanalysis2.R"                                                     
## [48] "rsconnect"                                                          
## [49] "SnowMap_Points.png"                                                 
## [50] "test.csv"                                                           
## [51] "Untitled (3).wma"                                                   
## [52] "Untitled (3).wma.wav"                                               
## [53] "Untitled 88.wma"
library(data.table)
BigDiamonds <- fread("C:/Users/dell/Desktop/BigDiamonds.csv")
## 
Read 21.7% of 598024 rows
Read 45.1% of 598024 rows
Read 56.9% of 598024 rows
Read 66.9% of 598024 rows
Read 88.6% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:08
str(BigDiamonds)
## Classes 'data.table' and 'data.frame':   598024 obs. of  13 variables:
##  $ V1          : chr  "1" "2" "3" "4" ...
##  $ carat       : num  0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
##  $ cut         : chr  "V.Good" "Good" "Good" "V.Good" ...
##  $ color       : chr  "K" "G" "J" "D" ...
##  $ clarity     : chr  "I1" "I1" "I2" "I1" ...
##  $ table       : num  59 61 58 60 59 60 63 61 57.5 65 ...
##  $ depth       : num  63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
##  $ cert        : chr  "GIA" "GIA" "GIA" "GIA" ...
##  $ measurements: chr  "3.96 x 3.95 x 2.52" "4.00 x 4.05 x 2.30" "4.56 x 4.53 x 2.67" "3.80 x 3.82 x 2.31" ...
##  $ price       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ x           : num  3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
##  $ y           : num  3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
##  $ z           : num  2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
##  - attr(*, ".internal.selfref")=<externalptr>
dim(BigDiamonds)
## [1] 598024     13
class(BigDiamonds)
## [1] "data.table" "data.frame"
nrow(BigDiamonds)
## [1] 598024
ncol(BigDiamonds)
## [1] 13
summary(BigDiamonds)
##       V1                carat           cut               color          
##  Length:598024      Min.   :0.200   Length:598024      Length:598024     
##  Class :character   1st Qu.:0.500   Class :character   Class :character  
##  Mode  :character   Median :0.900   Mode  :character   Mode  :character  
##                     Mean   :1.071                                        
##                     3rd Qu.:1.500                                        
##                     Max.   :9.250                                        
##                                                                          
##    clarity              table           depth           cert          
##  Length:598024      Min.   : 0.00   Min.   : 0.00   Length:598024     
##  Class :character   1st Qu.:56.00   1st Qu.:61.00   Class :character  
##  Mode  :character   Median :58.00   Median :62.10   Mode  :character  
##                     Mean   :57.63   Mean   :61.06                     
##                     3rd Qu.:59.00   3rd Qu.:62.70                     
##                     Max.   :75.90   Max.   :81.30                     
##                                                                       
##  measurements           price             x                y         
##  Length:598024      Min.   :  300   Min.   : 0.150   Min.   : 1.000  
##  Class :character   1st Qu.: 1220   1st Qu.: 4.740   1st Qu.: 4.970  
##  Mode  :character   Median : 3503   Median : 5.780   Median : 6.050  
##                     Mean   : 8753   Mean   : 5.991   Mean   : 6.199  
##                     3rd Qu.:11174   3rd Qu.: 6.970   3rd Qu.: 7.230  
##                     Max.   :99990   Max.   :13.890   Max.   :13.890  
##                     NA's   :713     NA's   :1815     NA's   :1852    
##        z         
##  Min.   : 0.040  
##  1st Qu.: 3.120  
##  Median : 3.860  
##  Mean   : 4.033  
##  3rd Qu.: 4.610  
##  Max.   :13.180  
##  NA's   :2544
summary(BigDiamonds$price)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     300    1220    3503    8753   11170   99990     713
names(BigDiamonds)
##  [1] "V1"           "carat"        "cut"          "color"       
##  [5] "clarity"      "table"        "depth"        "cert"        
##  [9] "measurements" "price"        "x"            "y"           
## [13] "z"
#Average carat size

mean(BigDiamonds$carat)
## [1] 1.071297
mean(BigDiamonds$price)
## [1] NA
mean(BigDiamonds$price,na.rm = T)#ignore missing
## [1] 8753.018
length(BigDiamonds$price)
## [1] 598024
BigDiamonds2=na.omit(BigDiamonds) #delete missing
dim(BigDiamonds2)
## [1] 593784     13
table(is.na(BigDiamonds$price))
## 
##  FALSE   TRUE 
## 597311    713
library(magrittr)


BigDiamonds$price %>%
  is.na  %>%
  table %>%
  pie

library(Hmisc)
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units

mean(BigDiamonds2$carat)
## [1] 1.072593
summarize(BigDiamonds2$carat,BigDiamonds2$color,mean)
##   BigDiamonds2$color BigDiamonds2$carat
## 1                  D          0.8274407
## 2                  E          0.8331336
## 3                  F          0.9424257
## 4                  G          1.0658902
## 5                  H          1.2106406
## 6                  I          1.2720231
## 7                  J          1.3484265
## 8                  K          1.4984115
## 9                  L          1.3632803
summarize(BigDiamonds2$price,BigDiamonds2$cut,mean)
##   BigDiamonds2$cut BigDiamonds2$price
## 1             Good           5256.226
## 2            Ideal           9924.824
## 3           V.Good           7430.927
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
sqldf("select cut,avg(price) from BigDiamonds2 group by cut")
## Loading required package: tcltk
##      cut avg(price)
## 1   Good   5256.226
## 2  Ideal   9924.824
## 3 V.Good   7430.927
BigDiamonds[,mean(price,na.rm=T),cut]
##       cut       V1
## 1: V.Good 7430.527
## 2:   Good 5254.792
## 3:  Ideal 9919.277
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:Hmisc':
## 
##     combine, src, summarize
## 
## The following objects are masked from 'package:data.table':
## 
##     between, last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
 summarize(group_by(BigDiamonds,cut),mean(price,na.rm=T))
## Source: local data table [3 x 2]
## 
##      cut mean(price, na.rm = T)
## 1 V.Good               7430.527
## 2   Good               5254.792
## 3  Ideal               9919.277