dirty_iris <- read.csv("https://raw.githubusercontent.com/edwindj/datacleaning/master/data/dirty_iris.csv")
summary(dirty_iris$Petal.Length)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    1.60    4.50    4.45    5.10   63.00      19
complete_observations <- sum(complete.cases(dirty_iris))
percentage_complete <- (complete_observations / 150) * 100
numeric_columns <- sapply(dirty_iris, is.numeric)
special_values <- sapply(dirty_iris[, numeric_columns], function(x) {
  any(is.infinite(x) | is.nan(x))
})
summary(dirty_iris)
##   Sepal.Length     Sepal.Width      Petal.Length    Petal.Width 
##  Min.   : 0.000   Min.   :-3.000   Min.   : 0.00   Min.   :0.1  
##  1st Qu.: 5.100   1st Qu.: 2.800   1st Qu.: 1.60   1st Qu.:0.3  
##  Median : 5.750   Median : 3.000   Median : 4.50   Median :1.3  
##  Mean   : 6.559   Mean   : 3.391   Mean   : 4.45   Mean   :Inf  
##  3rd Qu.: 6.400   3rd Qu.: 3.300   3rd Qu.: 5.10   3rd Qu.:1.8  
##  Max.   :73.000   Max.   :30.000   Max.   :63.00   Max.   :Inf  
##  NA's   :10       NA's   :17       NA's   :19      NA's   :12   
##    Species         
##  Length:150        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
inf_indicator <- is.infinite(dirty_iris$Petal.Width)
dirty_iris$Petal.Width[inf_indicator] <- NA
summary(dirty_iris)
##   Sepal.Length     Sepal.Width      Petal.Length    Petal.Width   
##  Min.   : 0.000   Min.   :-3.000   Min.   : 0.00   Min.   :0.100  
##  1st Qu.: 5.100   1st Qu.: 2.800   1st Qu.: 1.60   1st Qu.:0.300  
##  Median : 5.750   Median : 3.000   Median : 4.50   Median :1.300  
##  Mean   : 6.559   Mean   : 3.391   Mean   : 4.45   Mean   :1.207  
##  3rd Qu.: 6.400   3rd Qu.: 3.300   3rd Qu.: 5.10   3rd Qu.:1.800  
##  Max.   :73.000   Max.   :30.000   Max.   :63.00   Max.   :2.500  
##  NA's   :10       NA's   :17       NA's   :19      NA's   :13     
##    Species         
##  Length:150        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
rules_violate <- subset(dirty_iris,c((Sepal.Width<=0)|(Sepal.Length>30))) 
nrow(rules_violate)
## [1] 4
neg_indicator <- which(dirty_iris$Sepal.Width<0)
dirty_iris$Sepal.Width[neg_indicator]<-abs(dirty_iris$Sepal.Width[neg_indicator])
dirty_iris[neg_indicator,]
##    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 16            5           3          3.5           1 versicolor
zero_indicator <- which(dirty_iris$Sepal.Width == 0)
dirty_iris$Sepal.Width[zero_indicator] <- NA