data <- read.csv("Diamond_price.csv", header=TRUE)
str(data)
## 'data.frame': 1000 obs. of 10 variables:
## $ Shape : chr "Heart" "Heart" "Heart" "Heart" ...
## $ Carat : num 3.13 1.03 1.02 1.63 1.2 1.5 1.71 2.04 2.04 1.67 ...
## $ Cut : chr "Good" "Good" "Good" "Good" ...
## $ Color : chr "D" "H" "G" "K" ...
## $ Clarity : chr "SI2" "I1" "SI2" "SI2" ...
## $ Table : num 54 51 56 63 48.4 52 51.4 52 64.9 54.5 ...
## $ Depth : num 56.9 57.5 51.3 43 57.9 53 61.4 50.2 39.3 41.6 ...
## $ Cert : chr "AGS" "AGS" "AGS" "AGS" ...
## $ Measurements: chr "9.32 x 10.61 x 6.03" "6.22 x 7.03 x 4.04" "6.36 x 7.07 x 3.64" "7.83 x 8.28 x 3.56" ...
## $ Price : chr "$27,616" "$3,188" "$3,158" "$4,009" ...
data$Price <- gsub('\\$', '', data$Price)
data$Price <- gsub(',', '', data$Price)
mydata <- data[,c(1,2,3,4,5,7,10)]
mydata$Price <- as.numeric(as.character(mydata$Price))
mydata <- mydata[mydata$Price <15000,]
head(mydata)
## Shape Carat Cut Color Clarity Depth Price
## 2 Heart 1.03 Good H I1 57.5 3188
## 3 Heart 1.02 Good G SI2 51.3 3158
## 4 Heart 1.63 Good K SI2 43.0 4009
## 5 Heart 1.20 Ideal E SI2 57.9 5256
## 6 Heart 1.50 Ideal E SI2 53.0 7860
## 7 Heart 1.71 Ideal H SI2 61.4 8557