dirty_iris <- read.csv("https://raw.githubusercontent.com/edwindj/datacleaning/master/data/dirty_iris.csv")
Question 3
sum(is.na(dirty_iris$Petal.Length))
## [1] 19
Question 4
dirty_iris_complete <- na.omit(dirty_iris)
rowscomplete <- nrow(dirty_iris_complete)
rowsoriginal <- nrow(dirty_iris)
rowsfinal <- rowscomplete/rowsoriginal
rowsfinal
## [1] 0.64
Question 5
summary(dirty_iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. : 0.000 Min. :-3.000 Min. : 0.00 Min. :0.1
## 1st Qu.: 5.100 1st Qu.: 2.800 1st Qu.: 1.60 1st Qu.:0.3
## Median : 5.750 Median : 3.000 Median : 4.50 Median :1.3
## Mean : 6.559 Mean : 3.391 Mean : 4.45 Mean :Inf
## 3rd Qu.: 6.400 3rd Qu.: 3.300 3rd Qu.: 5.10 3rd Qu.:1.8
## Max. :73.000 Max. :30.000 Max. :63.00 Max. :Inf
## NA's :10 NA's :17 NA's :19 NA's :12
## Species
## Length:150
## Class :character
## Mode :character
##
##
##
##
Question 6
dirty_iris[is.na(dirty_iris)] <- "Missing"
Question 7
rules_violate <- subset(dirty_iris,c((Sepal.Width<=0)|(Sepal.Length>30)))
nrow(rules_violate)
## [1] 149
Question 8
problems <- which(dirty_iris$Sepal.Width < 0)
dirty_iris[problems, ]
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 16 5 -3 3.5 1 versicolor
dirty_iris$Sepal.Width[dirty_iris$Sepal.Width == "-3"] <- "3"
summary(dirty_iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Length:150 Length:150 Length:150 Length:150
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Species
## Length:150
## Class :character
## Mode :character
Sepal.Width mean
dirty_iris_mean <- dirty_iris$Sepal.Width[is.na(dirty_iris$Sepal.Width)] <- mean(dirty_iris$Sepal.Width, na.rm=TRUE)
## Warning in mean.default(dirty_iris$Sepal.Width, na.rm = TRUE): argument is not
## numeric or logical: returning NA
dirty_iris_mean
## [1] NA
Petal.Width kNN
library(VIM)
dirty_iris1 <- kNN(dirty_iris)
## Warning in kNN(dirty_iris): Nothing to impute, because no NA are present (also
## after using makeNA)