install.packages("tidyverse",repos = "http://cran.us.r-project.org")
library(tidyverse)
?diamonds
summary(diamonds)
## carat cut color clarity
## Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065
## 1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258
## Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194
## Mean :0.7979 Premium :13791 G:11292 VS1 : 8171
## 3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066
## Max. :5.0100 I: 5422 VVS1 : 3655
## J: 2808 (Other): 2531
## depth table price x
## Min. :43.00 Min. :43.00 Min. : 326 Min. : 0.000
## 1st Qu.:61.00 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710
## Median :61.80 Median :57.00 Median : 2401 Median : 5.700
## Mean :61.75 Mean :57.46 Mean : 3933 Mean : 5.731
## 3rd Qu.:62.50 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540
## Max. :79.00 Max. :95.00 Max. :18823 Max. :10.740
##
## y z
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.720 1st Qu.: 2.910
## Median : 5.710 Median : 3.530
## Mean : 5.735 Mean : 3.539
## 3rd Qu.: 6.540 3rd Qu.: 4.040
## Max. :58.900 Max. :31.800
##
attach(diamonds)
unique(clarity)
## [1] SI2 SI1 VS1 VS2 VVS2 VVS1 I1 IF
## Levels: I1 < SI2 < SI1 < VS2 < VS1 < VVS2 < VVS1 < IF
unique(color)
## [1] E I J H F G D
## Levels: D < E < F < G < H < I < J
unique(cut)
## [1] Ideal Premium Good Very Good Fair
## Levels: Fair < Good < Very Good < Premium < Ideal
Mean_carat<- mean(carat)
Median_carat<- median(carat)
Variance_carat<- var(carat)
StdDev_carat<- sd(carat)
Min_carat<- min(carat)
Max_carat<- max(carat)
Carat_stats <- c(Mean_carat,Median_carat,Variance_carat,StdDev_carat,Min_carat,Max_carat)
Mean_price<- mean(price)
Median_price<- median(price)
Variance_price<- var(price)
StdDev_price<- sd(price)
Min_price<- min(price)
Max_price<- max(price)
price_stats <- c(Mean_price,Median_price,Variance_price,StdDev_price,Min_price,Max_price)
Stat<- c(Carat_stats,price_stats)
Stats<-round(Stat,2)
Statistics <- matrix(Stats,nrow=2,byrow=TRUE)
colnames(Statistics) <- c("Mean","Median","Variance","Standard_Deviation","Min","Max")
rownames(Statistics)<-c("Carat","Price")
Statistics
## Mean Median Variance Standard_Deviation Min Max
## Carat 0.8 0.7 0.22 0.47 0.2 5.01
## Price 3932.8 2401.0 15915629.42 3989.44 326.0 18823.00
cov(carat,price)
## [1] 1742.765
cor(carat,price)
## [1] 0.9215913
plot(carat,price)
As covariance for carat and price is poisitive and correlation is almost near to positive 1, they exhibit positive relationship.
cov(depth,price)
## [1] -60.85371
cor(depth,price)
## [1] -0.0106474
plot(depth,price)
As covariance for depth and price is negative and correlation is almost equals to 0 they do not exhibit any relationship.
library(ggplot2)
ggplot(data=diamonds)+
geom_point(mapping=aes(x=carat,y=price,color=cut))
Carat and price exhibit linear positive relationship. As diamond’s carat quality improves the prices also increases. While majority of the Ideal diamonds are costlier than the fair ones of same carat.
ggplot(data=diamonds,mapping=aes(x=carat,y=price))+
geom_point(mapping=aes(color=cut))+
geom_smooth()
diamonds%>%count(color)
## # A tibble: 7 x 2
## color n
## <ord> <int>
## 1 D 6775
## 2 E 9797
## 3 F 9542
## 4 G 11292
## 5 H 8304
## 6 I 5422
## 7 J 2808
ggplot(data=diamonds,mapping=aes(x=clarity,y=price))+
geom_boxplot()+
coord_flip()
There are too many outliers specially for the good clarity diamonds and mean price for every clarity category is very low.
ggplot(data=diamonds)+
geom_point(mapping=aes(x=carat,y=price))+
facet_wrap(~clarity,nrow=3)
The graph is more dense towards the lower limits of each clarity category hence mean price is low but that does not discard the existence of diamonds with more price and better clarity hence they become outliers in the boxplot.
Best <- filter(diamonds,clarity=="IF",color=="D",cut=="Ideal")
attach(Best)
plot(carat,price)
Worst <- filter(diamonds,clarity=="I1",color=="J",cut=="Fair")
attach(Worst)
plot(carat,price)
Interesting observation is that the 23 Worst diamonds shows continuos linear relationship between carat and price while the Best 28 diamonds shows discrete relationship between carat and price of diamond.