dirty_iris <- read.csv("https://raw.githubusercontent.com/edwindj/datacleaning/master/data/dirty_iris.csv")

Question 3

sum(is.na(dirty_iris$Petal.Length))
## [1] 19

Question 4

dirty_iris_complete <- na.omit(dirty_iris)
rowscomplete <- nrow(dirty_iris_complete)
rowsoriginal <- nrow(dirty_iris)
rowsfinal <- rowscomplete/rowsoriginal
rowsfinal
## [1] 0.64

Question 5

summary(dirty_iris)
##   Sepal.Length     Sepal.Width      Petal.Length    Petal.Width 
##  Min.   : 0.000   Min.   :-3.000   Min.   : 0.00   Min.   :0.1  
##  1st Qu.: 5.100   1st Qu.: 2.800   1st Qu.: 1.60   1st Qu.:0.3  
##  Median : 5.750   Median : 3.000   Median : 4.50   Median :1.3  
##  Mean   : 6.559   Mean   : 3.391   Mean   : 4.45   Mean   :Inf  
##  3rd Qu.: 6.400   3rd Qu.: 3.300   3rd Qu.: 5.10   3rd Qu.:1.8  
##  Max.   :73.000   Max.   :30.000   Max.   :63.00   Max.   :Inf  
##  NA's   :10       NA's   :17       NA's   :19      NA's   :12   
##    Species         
##  Length:150        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 

Question 6

dirty_iris[is.na(dirty_iris)] <- "Missing"

Question 7

rules_violate <- subset(dirty_iris,c((Sepal.Width<=0)|(Sepal.Length>30)))
nrow(rules_violate)
## [1] 149

Question 8

problems <- which(dirty_iris$Sepal.Width < 0)
dirty_iris[problems, ]
##    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 16            5          -3          3.5           1 versicolor
dirty_iris$Sepal.Width[dirty_iris$Sepal.Width == "-3"] <- "3"
summary(dirty_iris)
##  Sepal.Length       Sepal.Width        Petal.Length       Petal.Width       
##  Length:150         Length:150         Length:150         Length:150        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##    Species         
##  Length:150        
##  Class :character  
##  Mode  :character

Sepal.Width mean

dirty_iris_mean <- dirty_iris$Sepal.Width[is.na(dirty_iris$Sepal.Width)] <- mean(dirty_iris$Sepal.Width, na.rm=TRUE) 
## Warning in mean.default(dirty_iris$Sepal.Width, na.rm = TRUE): argument is not
## numeric or logical: returning NA
dirty_iris_mean
## [1] NA

Petal.Length median

dirty_iris_median <- dirty_iris$Petal.Length[is.na(dirty_iris$Petal.Length)] <- median(dirty_iris$Petal.Length, na.rm=TRUE) 
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
## argument is not numeric or logical: returning NA
dirty_iris_median
## [1] NA

Petal.Width kNN

library(VIM)

dirty_iris1 <- kNN(dirty_iris)
## Warning in kNN(dirty_iris): Nothing to impute, because no NA are present (also
## after using makeNA)