#forgot my code
ls()
## character(0)
ls()
## character(0)
ls()
## character(0)
a=1:10
a
## [1] 1 2 3 4 5 6 7 8 9 10
ac="Hello World"
a
## [1] 1 2 3 4 5 6 7 8 9 10
getwd()
## [1] "C:/Users/ajaohri/Documents/R/Tutorials"
a=1:10
b=16:25
plot(a,b)

plot(a,b,type='l')
getwd()
## [1] "C:/Users/ajaohri/Documents/R/Tutorials"
setwd("C:\\Users\\ajaohri/Desktop")
dir()
## [1] "Ajay Ohri Sapient Resume.docx"
## [2] "all"
## [3] "Anaconda3 (64-bit) - Shortcut.lnk"
## [4] "BOOKS"
## [5] "churn"
## [6] "desktop.ini"
## [7] "Docker for Windows.lnk"
## [8] "Enron"
## [9] "FnF_01_01209_Ajay Ohri.pdf"
## [10] "FnF_01_01209_Ajay Ohri.zip"
## [11] "Freeware (gursoftbank) - Shortcut.lnk"
## [12] "HIRING PROCESS"
## [13] "mentoring"
## [14] "model engineering"
## [15] "mortdefault"
## [16] "PERSONAL"
## [17] "POCs"
## [18] "Telecom Churn Using Multiple Machine Learning Models (1).html"
## [19] "Telecom Churn Using Multiple Machine Learning Models (1).ipynb"
## [20] "Telecom Churn Using Multiple Machine Learning Models.ipynb"
## [21] "Telecom Churn Using Multiple Machine Learning Models.r"
## [22] "Telecom Churn Using Multiple Machine Learning Models.slides.html"
## [23] "Tutorial_2.docx"
## [24] "TUTORIALS"
ls()
## [1] "a" "ac" "b"
rm(b)
rm(list = ls())
library(Rcmdr)
## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: car
## Loading required package: carData
## Loading required package: sandwich
## Loading required package: effects
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## The Commander GUI is launched only in interactive sessions
##
## Attaching package: 'Rcmdr'
## The following object is masked from 'package:car':
##
## Confint

library("Rcmdr", lib.loc="~/R/win-library/3.5")
Dataset <-
read.table("C:/Users/ajaohri/Desktop/HIRING PROCESS/coding exercise/winequality-red.csv",
header=TRUE, sep=";", na.strings="NA", dec=".", strip.white=TRUE)
names(Dataset)
## [1] "fixed.acidity" "volatile.acidity" "citric.acid"
## [4] "residual.sugar" "chlorides" "free.sulfur.dioxide"
## [7] "total.sulfur.dioxide" "density" "pH"
## [10] "sulphates" "alcohol" "quality"
summary(Dataset)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.60 Min. :0.1200 Min. :0.000 Min. : 0.900
## 1st Qu.: 7.10 1st Qu.:0.3900 1st Qu.:0.090 1st Qu.: 1.900
## Median : 7.90 Median :0.5200 Median :0.260 Median : 2.200
## Mean : 8.32 Mean :0.5278 Mean :0.271 Mean : 2.539
## 3rd Qu.: 9.20 3rd Qu.:0.6400 3rd Qu.:0.420 3rd Qu.: 2.600
## Max. :15.90 Max. :1.5800 Max. :1.000 Max. :15.500
## chlorides free.sulfur.dioxide total.sulfur.dioxide
## Min. :0.01200 Min. : 1.00 Min. : 6.00
## 1st Qu.:0.07000 1st Qu.: 7.00 1st Qu.: 22.00
## Median :0.07900 Median :14.00 Median : 38.00
## Mean :0.08747 Mean :15.87 Mean : 46.47
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 62.00
## Max. :0.61100 Max. :72.00 Max. :289.00
## density pH sulphates alcohol
## Min. :0.9901 Min. :2.740 Min. :0.3300 Min. : 8.40
## 1st Qu.:0.9956 1st Qu.:3.210 1st Qu.:0.5500 1st Qu.: 9.50
## Median :0.9968 Median :3.310 Median :0.6200 Median :10.20
## Mean :0.9967 Mean :3.311 Mean :0.6581 Mean :10.42
## 3rd Qu.:0.9978 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.10
## Max. :1.0037 Max. :4.010 Max. :2.0000 Max. :14.90
## quality
## Min. :3.000
## 1st Qu.:5.000
## Median :6.000
## Mean :5.636
## 3rd Qu.:6.000
## Max. :8.000
table(Dataset$quality)
##
## 3 4 5 6 7 8
## 10 53 681 638 199 18
table(Dataset[12])
##
## 3 4 5 6 7 8
## 10 53 681 638 199 18
table(Dataset[,12])
##
## 3 4 5 6 7 8
## 10 53 681 638 199 18
table(Dataset[,6])
##
## 1 2 3 4 5 5.5 6 7 8 9 10 11 12 13 14
## 3 1 49 41 104 1 138 71 56 62 79 59 75 57 50
## 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
## 78 61 60 46 39 30 41 22 32 34 24 32 29 23 23
## 30 31 32 33 34 35 36 37 37.5 38 39 40 40.5 41 42
## 16 20 22 11 18 15 11 3 2 9 5 6 1 7 3
## 43 45 46 47 48 50 51 52 53 54 55 57 66 68 72
## 3 3 1 1 4 2 4 3 1 1 2 1 1 2 1
unique(Dataset[,6])
## [1] 11.0 25.0 15.0 17.0 13.0 9.0 16.0 52.0 51.0 35.0 6.0 29.0 23.0 10.0
## [15] 21.0 4.0 14.0 8.0 22.0 40.0 5.0 3.0 7.0 12.0 30.0 33.0 50.0 19.0
## [29] 20.0 27.0 18.0 28.0 34.0 42.0 41.0 37.0 32.0 36.0 24.0 26.0 39.0 40.5
## [43] 68.0 31.0 38.0 43.0 47.0 1.0 54.0 46.0 45.0 2.0 5.5 53.0 37.5 57.0
## [57] 48.0 72.0 55.0 66.0
table(Dataset$free.sulfur.dioxide,Dataset$quality)
##
## 3 4 5 6 7 8
## 1 0 0 0 3 0 0
## 2 0 0 0 1 0 0
## 3 1 3 16 17 11 1
## 4 0 5 18 11 7 0
## 5 3 5 46 36 11 3
## 5.5 0 0 0 1 0 0
## 6 2 7 35 64 26 4
## 7 0 3 30 26 11 1
## 8 0 1 23 30 1 1
## 9 0 2 30 19 11 0
## 10 1 0 39 22 17 0
## 11 0 5 20 26 8 0
## 12 0 3 37 22 12 1
## 13 0 1 20 25 11 0
## 14 0 3 16 28 3 0
## 15 0 2 37 32 5 2
## 16 1 0 29 22 9 0
## 17 0 3 21 30 5 1
## 18 0 0 18 24 4 0
## 19 0 1 15 19 3 1
## 20 1 0 16 11 2 0
## 21 0 0 23 13 5 0
## 22 0 1 7 12 2 0
## 23 0 1 15 12 4 0
## 24 0 0 13 16 5 0
## 25 0 0 14 8 2 0
## 26 0 2 11 18 1 0
## 27 0 1 15 13 0 0
## 28 0 1 14 7 0 1
## 29 0 0 9 12 2 0
## 30 0 0 9 6 1 0
## 31 0 0 10 7 3 0
## 32 0 1 12 7 2 0
## 33 0 0 5 6 0 0
## 34 1 0 11 5 0 1
## 35 0 0 10 1 4 0
## 36 0 1 6 2 2 0
## 37 0 0 2 0 1 0
## 37.5 0 0 0 0 2 0
## 38 0 0 3 4 2 0
## 39 0 0 3 2 0 0
## 40 0 0 3 3 0 0
## 40.5 0 0 0 1 0 0
## 41 0 1 1 5 0 0
## 42 0 0 1 1 0 1
## 43 0 0 2 1 0 0
## 45 0 0 1 0 2 0
## 46 0 0 1 0 0 0
## 47 0 0 1 0 0 0
## 48 0 0 3 1 0 0
## 50 0 0 1 1 0 0
## 51 0 0 3 1 0 0
## 52 0 0 2 1 0 0
## 53 0 0 0 0 1 0
## 54 0 0 0 0 1 0
## 55 0 0 0 2 0 0
## 57 0 0 1 0 0 0
## 66 0 0 1 0 0 0
## 68 0 0 2 0 0 0
## 72 0 0 0 1 0 0
summary(table(Dataset$free.sulfur.dioxide,Dataset$quality))
## Number of cases in table: 1599
## Number of factors: 2
## Test for independence of all factors:
## Chisq = 311.81, df = 295, p-value = 0.2399
## Chi-squared approximation may be incorrect
barplot(Dataset$fixed.acidity)

boxplot(Dataset$fixed.acidity)

pie(table(Dataset$quality))

hist(Dataset$quality)

hist(Dataset$fixed.acidity,main="My First Graph",col=rainbow(7))

hist(Dataset$fixed.acidity,main="My First Graph",col=rainbow(7),breaks=20)

Dataset$quality=as.factor(Dataset$quality)
str(Dataset)
## 'data.frame': 1599 obs. of 12 variables:
## $ fixed.acidity : num 7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
## $ volatile.acidity : num 0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
## $ citric.acid : num 0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
## $ residual.sugar : num 1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
## $ chlorides : num 0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
## $ free.sulfur.dioxide : num 11 25 15 17 11 13 15 15 9 17 ...
## $ total.sulfur.dioxide: num 34 67 54 60 34 40 59 21 18 102 ...
## $ density : num 0.998 0.997 0.997 0.998 0.998 ...
## $ pH : num 3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
## $ sulphates : num 0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
## $ alcohol : num 9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
## $ quality : Factor w/ 6 levels "3","4","5","6",..: 3 3 3 4 3 3 3 5 5 3 ...
pie(table(Dataset$quality))

summary(Dataset)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.60 Min. :0.1200 Min. :0.000 Min. : 0.900
## 1st Qu.: 7.10 1st Qu.:0.3900 1st Qu.:0.090 1st Qu.: 1.900
## Median : 7.90 Median :0.5200 Median :0.260 Median : 2.200
## Mean : 8.32 Mean :0.5278 Mean :0.271 Mean : 2.539
## 3rd Qu.: 9.20 3rd Qu.:0.6400 3rd Qu.:0.420 3rd Qu.: 2.600
## Max. :15.90 Max. :1.5800 Max. :1.000 Max. :15.500
## chlorides free.sulfur.dioxide total.sulfur.dioxide
## Min. :0.01200 Min. : 1.00 Min. : 6.00
## 1st Qu.:0.07000 1st Qu.: 7.00 1st Qu.: 22.00
## Median :0.07900 Median :14.00 Median : 38.00
## Mean :0.08747 Mean :15.87 Mean : 46.47
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 62.00
## Max. :0.61100 Max. :72.00 Max. :289.00
## density pH sulphates alcohol quality
## Min. :0.9901 Min. :2.740 Min. :0.3300 Min. : 8.40 3: 10
## 1st Qu.:0.9956 1st Qu.:3.210 1st Qu.:0.5500 1st Qu.: 9.50 4: 53
## Median :0.9968 Median :3.310 Median :0.6200 Median :10.20 5:681
## Mean :0.9967 Mean :3.311 Mean :0.6581 Mean :10.42 6:638
## 3rd Qu.:0.9978 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.10 7:199
## Max. :1.0037 Max. :4.010 Max. :2.0000 Max. :14.90 8: 18
#http://bit.ly/dsdata
#BigDiamonds
library(readr)
BigDiamonds <- read_csv("C:/Users/ajaohri/Desktop/HIRING PROCESS/coding exercise/BigDiamonds.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_integer(),
## carat = col_double(),
## cut = col_character(),
## color = col_character(),
## clarity = col_character(),
## table = col_double(),
## depth = col_double(),
## cert = col_character(),
## measurements = col_character(),
## price = col_integer(),
## x = col_double(),
## y = col_double(),
## z = col_double()
## )
head(BigDiamonds)
## # A tibble: 6 x 13
## X1 carat cut color clarity table depth cert measurements price
## <int> <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <int>
## 1 1 0.25 V.Go~ K I1 59 63.7 GIA 3.96 x 3.95~ NA
## 2 2 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05~ NA
## 3 3 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53~ NA
## 4 4 0.21 V.Go~ D I1 60 60.6 GIA 3.80 x 3.82~ NA
## 5 5 0.31 V.Go~ K I1 59 62.2 EGL 4.35 x 4.26~ NA
## 6 6 0.2 Good G SI2 60 64.4 GIA 3.74 x 3.67~ NA
## # ... with 3 more variables: x <dbl>, y <dbl>, z <dbl>
BigDiamonds=data.frame(BigDiamonds)
str(BigDiamonds)
## 'data.frame': 598024 obs. of 13 variables:
## $ X1 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ carat : num 0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
## $ cut : chr "V.Good" "Good" "Good" "V.Good" ...
## $ color : chr "K" "G" "J" "D" ...
## $ clarity : chr "I1" "I1" "I2" "I1" ...
## $ table : num 59 61 58 60 59 60 63 61 57.5 65 ...
## $ depth : num 63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
## $ cert : chr "GIA" "GIA" "GIA" "GIA" ...
## $ measurements: chr "3.96 x 3.95 x 2.52" "4.00 x 4.05 x 2.30" "4.56 x 4.53 x 2.67" "3.80 x 3.82 x 2.31" ...
## $ price : int NA NA NA NA NA NA NA NA NA NA ...
## $ x : num 3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
## $ y : num 3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
## $ z : num 2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
summary(BigDiamonds)
## X1 carat cut color
## Min. : 1 Min. :0.200 Length:598024 Length:598024
## 1st Qu.:149507 1st Qu.:0.500 Class :character Class :character
## Median :299013 Median :0.900 Mode :character Mode :character
## Mean :299013 Mean :1.071
## 3rd Qu.:448518 3rd Qu.:1.500
## Max. :598024 Max. :9.250
##
## clarity table depth cert
## Length:598024 Min. : 0.00 Min. : 0.00 Length:598024
## Class :character 1st Qu.:56.00 1st Qu.:61.00 Class :character
## Mode :character Median :58.00 Median :62.10 Mode :character
## Mean :57.63 Mean :61.06
## 3rd Qu.:59.00 3rd Qu.:62.70
## Max. :75.90 Max. :81.30
##
## measurements price x y
## Length:598024 Min. : 300 Min. : 0.150 Min. : 1.000
## Class :character 1st Qu.: 1220 1st Qu.: 4.740 1st Qu.: 4.970
## Mode :character Median : 3503 Median : 5.780 Median : 6.050
## Mean : 8753 Mean : 5.991 Mean : 6.199
## 3rd Qu.:11174 3rd Qu.: 6.970 3rd Qu.: 7.230
## Max. :99990 Max. :13.890 Max. :13.890
## NA's :713 NA's :1815 NA's :1852
## z
## Min. : 0.040
## 1st Qu.: 3.120
## Median : 3.860
## Mean : 4.033
## 3rd Qu.: 4.610
## Max. :13.180
## NA's :2544
anyNA(BigDiamonds)
## [1] TRUE
BigDiamonds=na.omit(BigDiamonds)
str(BigDiamonds)
## 'data.frame': 593784 obs. of 13 variables:
## $ X1 : int 494 495 496 497 498 499 500 501 502 503 ...
## $ carat : num 0.24 0.31 0.26 0.24 0.3 0.34 0.2 0.29 0.22 0.25 ...
## $ cut : chr "V.Good" "V.Good" "Good" "Ideal" ...
## $ color : chr "G" "K" "J" "G" ...
## $ clarity : chr "SI1" "SI2" "VS2" "SI1" ...
## $ table : num 61 59 56.5 55 57 66 62 58 62 64 ...
## $ depth : num 58.9 60.2 64.1 61.3 62.2 55 59.1 61.4 59.6 60.5 ...
## $ cert : chr "GIA" "GIA" "IGI" "GIA" ...
## $ measurements: chr "4.09 x 4.10 x 2.41" "4.40 x 4.42 x 2.65" "4.01 x 4.05 x 2.58" "4.01 x 4.03 x 2.47" ...
## $ price : int 300 300 300 300 300 300 301 301 301 301 ...
## $ x : num 4.09 4.4 4.01 4.01 4.21 4.75 3.79 4.25 3.9 4.02 ...
## $ y : num 4.1 4.42 4.05 4.03 4.24 4.61 3.82 4.31 3.93 4.06 ...
## $ z : num 2.41 2.65 2.58 2.47 2.63 2.57 2.25 2.63 2.33 2.44 ...
## - attr(*, "na.action")= 'omit' Named int 1 2 3 4 5 6 7 8 9 10 ...
## ..- attr(*, "names")= chr "1" "2" "3" "4" ...
BigDiamonds2=BigDiamonds
BigDiamonds[3:5]=lapply(BigDiamonds[3:5],as.factor)
str(BigDiamonds)
## 'data.frame': 593784 obs. of 13 variables:
## $ X1 : int 494 495 496 497 498 499 500 501 502 503 ...
## $ carat : num 0.24 0.31 0.26 0.24 0.3 0.34 0.2 0.29 0.22 0.25 ...
## $ cut : Factor w/ 3 levels "Good","Ideal",..: 3 3 1 2 1 1 3 2 2 3 ...
## $ color : Factor w/ 9 levels "D","E","F","G",..: 4 8 7 4 5 3 6 4 6 1 ...
## $ clarity : Factor w/ 9 levels "I1","I2","IF",..: 4 5 7 4 1 1 9 1 6 4 ...
## $ table : num 61 59 56.5 55 57 66 62 58 62 64 ...
## $ depth : num 58.9 60.2 64.1 61.3 62.2 55 59.1 61.4 59.6 60.5 ...
## $ cert : chr "GIA" "GIA" "IGI" "GIA" ...
## $ measurements: chr "4.09 x 4.10 x 2.41" "4.40 x 4.42 x 2.65" "4.01 x 4.05 x 2.58" "4.01 x 4.03 x 2.47" ...
## $ price : int 300 300 300 300 300 300 301 301 301 301 ...
## $ x : num 4.09 4.4 4.01 4.01 4.21 4.75 3.79 4.25 3.9 4.02 ...
## $ y : num 4.1 4.42 4.05 4.03 4.24 4.61 3.82 4.31 3.93 4.06 ...
## $ z : num 2.41 2.65 2.58 2.47 2.63 2.57 2.25 2.63 2.33 2.44 ...
## - attr(*, "na.action")= 'omit' Named int 1 2 3 4 5 6 7 8 9 10 ...
## ..- attr(*, "names")= chr "1" "2" "3" "4" ...
rm(BigDiamonds2)
unique(BigDiamonds$cert)
## [1] "GIA" "IGI" "EGL USA" "EGL" "EGL Intl."
## [6] "AGS" "OTHER" "HRD" "EGL ISRAEL"
#unique(BigDiamonds$measurements)
BigDiamonds[8]=lapply(BigDiamonds[8],as.factor)
table(BigDiamonds$color,BigDiamonds$cut)
##
## Good Ideal V.Good
## D 6566 45175 21460
## E 9623 55220 28016
## F 9042 57703 26027
## G 8804 61569 24990
## H 7542 55588 22821
## I 7339 42779 19761
## J 5316 29322 13840
## K 3449 14631 7580
## L 1468 5039 3114
library(data.table)
BigDiamonds=as.data.table(BigDiamonds)
BigDiamonds[carat>4,mean(price),color]
## color V1
## 1: D 54892.52
## 2: E 55229.97
## 3: G 58653.00
## 4: K 55531.86
## 5: J 60801.32
## 6: I 65580.97
## 7: H 62766.09
## 8: F 60127.75
## 9: L 49344.77
BigDiamonds[carat>4,mean(price),.(color,cut)]
## color cut V1
## 1: D Good 41765.17
## 2: E V.Good 48962.27
## 3: G Good 54954.66
## 4: K Ideal 56021.63
## 5: J Good 52148.42
## 6: I Ideal 67698.66
## 7: K V.Good 54158.94
## 8: I V.Good 63904.79
## 9: H Ideal 64726.88
## 10: E Good 38605.41
## 11: F Ideal 65238.88
## 12: K Good 58215.48
## 13: J Ideal 62742.25
## 14: H V.Good 57999.82
## 15: F V.Good 51123.08
## 16: G V.Good 57393.23
## 17: J V.Good 59458.64
## 18: I Good 55191.36
## 19: L V.Good 48798.36
## 20: G Ideal 60005.04
## 21: E Ideal 59862.82
## 22: L Ideal 51010.76
## 23: D V.Good 55993.31
## 24: F Good 58889.82
## 25: D Ideal 56046.47
## 26: L Good 45930.74
## 27: H Good 62125.52
## color cut V1
BigDiamonds[carat>4,.(mean(price),.N),.(color,cut)]
## color cut V1 N
## 1: D Good 41765.17 12
## 2: E V.Good 48962.27 51
## 3: G Good 54954.66 47
## 4: K Ideal 56021.63 535
## 5: J Good 52148.42 109
## 6: I Ideal 67698.66 748
## 7: K V.Good 54158.94 357
## 8: I V.Good 63904.79 288
## 9: H Ideal 64726.88 798
## 10: E Good 38605.41 17
## 11: F Ideal 65238.88 242
## 12: K Good 58215.48 85
## 13: J Ideal 62742.25 800
## 14: H V.Good 57999.82 317
## 15: F V.Good 51123.08 132
## 16: G V.Good 57393.23 228
## 17: J V.Good 59458.64 454
## 18: I Good 55191.36 106
## 19: L V.Good 48798.36 123
## 20: G Ideal 60005.04 341
## 21: E Ideal 59862.82 130
## 22: L Ideal 51010.76 151
## 23: D V.Good 55993.31 54
## 24: F Good 58889.82 39
## 25: D Ideal 56046.47 85
## 26: L Good 45930.74 54
## 27: H Good 62125.52 84
## color cut V1 N