dirty_iris <- read.csv("https://raw.githubusercontent.com/edwindj/datacleaning/master/data/dirty_iris.csv")

Question 3

sum(is.na(dirty_iris$Petal.Length))
## [1] 19

Question 4

sum(complete.cases(dirty_iris))
## [1] 96
# percent
mean(complete.cases(dirty_iris)) * 100
## [1] 64

Question 5

colSums(sapply(dirty_iris, is.nan))
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##            0            0            0            0            0
colSums(sapply(dirty_iris, is.infinite))
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##            0            0            0            1            0

Question 6

table(dirty_iris$Petal.Width)
## 
## 0.1 0.2 0.3 0.4 0.5 0.6   1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9   2 2.1 2.2 2.3 
##   5  26   6   6   1   1   6   3   4  13   8  12   4   1   8   4   7   6   2   8 
## 2.4 2.5 Inf 
##   3   3   1
which(dirty_iris$Petal.Width == "Inf")
## [1] 86
dirty_iris$Petal.Width[86] <- "NA"
table(dirty_iris$Petal.Width)
## 
## 0.1 0.2 0.3 0.4 0.5 0.6   1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9   2 2.1 2.2 2.3 
##   5  26   6   6   1   1   6   3   4  13   8  12   4   1   8   4   7   6   2   8 
## 2.4 2.5  NA 
##   3   3   1

Question 7

violations <- subset(dirty_iris, Sepal.Width <= 0 | Sepal.Length > 30)

violations
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 16           5.0          -3          3.5           1 versicolor
## 28          73.0          29         63.0        <NA>  virginica
## 125         49.0          30         14.0           2     setosa
## 130          5.7           0          1.7         0.3     setosa
nrow(violations)
## [1] 4

Question 8

#filtering out NAs since they are not allowed in subscripted assignments
#Make negative absolute value
neg_values <- which(!is.na(dirty_iris$Sepal.Width) & dirty_iris$Sepal.Width < 0)
dirty_iris$Sepal.Width[neg_values] <- abs(dirty_iris$Sepal.Width[neg_values])
print(dirty_iris$Sepal.Width)
##   [1]  3.2  3.3   NA  3.4  2.6   NA  2.7  3.0  2.7  3.1  3.5  2.7  3.0  2.8  3.9
##  [16]  3.0   NA  3.2  4.0   NA  3.6   NA  2.8  3.3  3.0  3.2  3.1 29.0  3.2  2.8
##  [31]  3.2  3.2  2.8  2.9  2.9  3.0  3.0  2.2  2.5  3.0   NA  2.7   NA  2.7  4.2
##  [46]  2.8   NA  3.2  3.0  3.4  2.6  3.1  2.7  3.4  3.3  3.8  3.8  2.9  2.8  2.8
##  [61]  2.3  2.8  3.0  3.3  3.0  2.5  2.5  3.2  3.5  3.5  3.0  3.1  3.5   NA  2.8
##  [76]  2.5  3.5  3.0  3.8  3.8  2.6  3.4  2.9  3.7  3.0  3.8  2.9  2.9  2.9  2.5
##  [91]  3.2   NA  3.4  2.7  2.2  3.1  2.3   NA  3.0  2.8  3.4  3.6  2.7  3.0  3.7
## [106]   NA  3.0  3.0  2.8  3.4  3.4  3.4  3.4  3.3  3.1  2.6   NA  3.1  3.0  2.8
## [121]  3.0  2.3  3.2  4.1 30.0  2.9  3.2   NA  3.6  0.0  2.5  3.1   NA  3.3  3.0
## [136]  3.0  3.2  3.0  3.1  2.2   NA   NA  3.0  2.9  2.5  3.1  3.0  3.5  3.1  2.6
# Make 0 become NA
dirty_iris$Sepal.Width[dirty_iris$Sepal.Width == 0] <- NA
print(dirty_iris$Sepal.Width)
##   [1]  3.2  3.3   NA  3.4  2.6   NA  2.7  3.0  2.7  3.1  3.5  2.7  3.0  2.8  3.9
##  [16]  3.0   NA  3.2  4.0   NA  3.6   NA  2.8  3.3  3.0  3.2  3.1 29.0  3.2  2.8
##  [31]  3.2  3.2  2.8  2.9  2.9  3.0  3.0  2.2  2.5  3.0   NA  2.7   NA  2.7  4.2
##  [46]  2.8   NA  3.2  3.0  3.4  2.6  3.1  2.7  3.4  3.3  3.8  3.8  2.9  2.8  2.8
##  [61]  2.3  2.8  3.0  3.3  3.0  2.5  2.5  3.2  3.5  3.5  3.0  3.1  3.5   NA  2.8
##  [76]  2.5  3.5  3.0  3.8  3.8  2.6  3.4  2.9  3.7  3.0  3.8  2.9  2.9  2.9  2.5
##  [91]  3.2   NA  3.4  2.7  2.2  3.1  2.3   NA  3.0  2.8  3.4  3.6  2.7  3.0  3.7
## [106]   NA  3.0  3.0  2.8  3.4  3.4  3.4  3.4  3.3  3.1  2.6   NA  3.1  3.0  2.8
## [121]  3.0  2.3  3.2  4.1 30.0  2.9  3.2   NA  3.6   NA  2.5  3.1   NA  3.3  3.0
## [136]  3.0  3.2  3.0  3.1  2.2   NA   NA  3.0  2.9  2.5  3.1  3.0  3.5  3.1  2.6

Question 9

# Sepal Width Mean
dirty_iris$Sepal.Width[is.na(dirty_iris$Sepal.Width)] <- 
  mean(dirty_iris$Sepal.Width, na.rm = TRUE)
#Petal Length Median
dirty_iris$Petal.Length[is.na(dirty_iris$Petal.Length)] <- 
  median(dirty_iris$Petal.Length, na.rm = TRUE)
colSums(is.na(dirty_iris))
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##           10            0            0           12            0