rm(list = ls())
mydata
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1            6.4         3.2        4.500         1.5 versicolor
## 2            6.3         3.3        6.000         2.5  virginica
## 3            6.2          NA        5.400         2.3  virginica
## 4            5.0         3.4        1.600         0.4     setosa
## 5            5.7         2.6        3.500         1.0 versicolor
## 6            5.3          NA           NA         0.2     setosa
## 7            6.4         2.7        5.300          NA  virginica
## 8            5.9         3.0        5.100         1.8  virginica
## 9            5.8         2.7        4.100         1.0 versicolor
## 10           4.8         3.1        1.600         0.2     setosa
## 11           5.0         3.5        1.600         0.6     setosa
## 12           6.0         2.7        5.100         1.6 versicolor
## 13           6.0         3.0        4.800          NA  virginica
## 14           6.8         2.8        4.800         1.4 versicolor
## 15            NA         3.9        1.700         0.4     setosa
## 16           5.0        -3.0        3.500         1.0 versicolor
## 17           5.5          NA        4.000         1.3 versicolor
## 18           4.7         3.2        1.300         0.2     setosa
## 19            NA         4.0           NA         0.2     setosa
## 20           5.6          NA        4.200         1.3 versicolor
## 21           4.9         3.6           NA         0.1     setosa
## 22           5.4          NA        4.500         1.5 versicolor
## 23           6.2         2.8           NA         1.8  virginica
## 24           6.7         3.3        5.700         2.5  virginica
## 25            NA         3.0        5.900         2.1  virginica
## 26           4.6         3.2        1.400         0.2     setosa
## 27           4.9         3.1        1.500         0.1     setosa
## 28          73.0        29.0       63.000          NA  virginica
## 29           6.5         3.2        5.100         2.0  virginica
## 30            NA         2.8        0.820         1.3 versicolor
## 31           4.4         3.2           NA         0.2     setosa
## 32           5.9         3.2        4.800          NA versicolor
## 33           5.7         2.8        4.500         1.3 versicolor
## 34           6.2         2.9           NA         1.3 versicolor
## 35           6.6         2.9       23.000         1.3 versicolor
## 36           4.8         3.0        1.400         0.1     setosa
## 37           6.5         3.0        5.500         1.8  virginica
## 38           6.2         2.2        4.500         1.5 versicolor
## 39           6.7         2.5        5.800         1.8  virginica
## 40           5.0         3.0        1.600         0.2     setosa
## 41           5.0          NA        1.200         0.2     setosa
## 42           5.8         2.7        3.900         1.2 versicolor
## 43           0.0          NA        1.300         0.4     setosa
## 44           5.8         2.7        5.100         1.9  virginica
## 45           5.5         4.2        1.400         0.2     setosa
## 46           7.7         2.8        6.700         2.0  virginica
## 47           5.7          NA           NA         0.4     setosa
## 48           7.0         3.2        4.700         1.4 versicolor
## 49           6.5         3.0        5.800         2.2  virginica
## 50           6.0         3.4        4.500         1.6 versicolor
## 51           5.5         2.6        4.400         1.2 versicolor
## 52           4.9         3.1           NA         0.2     setosa
## 53           5.2         2.7        3.900         1.4 versicolor
## 54           4.8         3.4        1.600         0.2     setosa
## 55           6.3         3.3        4.700         1.6 versicolor
## 56           7.7         3.8        6.700         2.2  virginica
## 57           5.1         3.8        1.500         0.3     setosa
## 58            NA         2.9        4.500         1.5 versicolor
## 59           6.4         2.8        5.600          NA  virginica
## 60           6.4         2.8        5.600         2.1  virginica
## 61           5.0         2.3        3.300          NA versicolor
## 62           7.4         2.8        6.100         1.9  virginica
## 63           4.3         3.0        1.100         0.1     setosa
## 64           5.0         3.3        1.400         0.2     setosa
## 65           7.2         3.0        5.800         1.6  virginica
## 66           6.3         2.5        4.900         1.5 versicolor
## 67           5.1         2.5           NA         1.1 versicolor
## 68            NA         3.2        5.700         2.3  virginica
## 69           5.1         3.5           NA          NA     setosa
## 70           5.0         3.5        1.300         0.3     setosa
## 71           6.1         3.0        4.600         1.4 versicolor
## 72           6.9         3.1        5.100         2.3  virginica
## 73           5.1         3.5        1.400         0.3     setosa
## 74           6.5          NA        4.600         1.5 versicolor
## 75           5.6         2.8        4.900         2.0  virginica
## 76           4.9         2.5        4.500          NA  virginica
## 77           5.5         3.5        1.300         0.2     setosa
## 78           7.6         3.0        6.600         2.1  virginica
## 79           5.1         3.8        0.000         0.2     setosa
## 80           7.9         3.8        6.400         2.0  virginica
## 81           6.1         2.6        5.600         1.4  virginica
## 82           5.4         3.4        1.700         0.2     setosa
## 83           6.1         2.9        4.700         1.4 versicolor
## 84           5.4         3.7        1.500         0.2     setosa
## 85           6.7         3.0        5.200         2.3  virginica
## 86           5.1         3.8        1.900         Inf     setosa
## 87           6.4         2.9        4.300         1.3 versicolor
## 88           5.7         2.9        4.200         1.3 versicolor
## 89           4.4         2.9        1.400         0.2     setosa
## 90           6.3         2.5        5.000         1.9  virginica
## 91           7.2         3.2        6.000         1.8  virginica
## 92           4.9          NA        3.300         1.0 versicolor
## 93           5.2         3.4        1.400         0.2     setosa
## 94           5.8         2.7        5.100         1.9  virginica
## 95           6.0         2.2        5.000         1.5  virginica
## 96           6.9         3.1           NA         1.5 versicolor
## 97           5.5         2.3        4.000         1.3 versicolor
## 98           6.7          NA        5.000         1.7 versicolor
## 99           5.7         3.0        4.200         1.2 versicolor
## 100          6.3         2.8        5.100         1.5  virginica
## 101          5.4         3.4        1.500         0.4     setosa
## 102          7.2         3.6           NA         2.5  virginica
## 103          6.3         2.7        4.900          NA  virginica
## 104          5.6         3.0        4.100         1.3 versicolor
## 105          5.1         3.7           NA         0.4     setosa
## 106          5.5          NA        0.925         1.0 versicolor
## 107          6.5         3.0        5.200         2.0  virginica
## 108          4.8         3.0        1.400          NA     setosa
## 109          6.1         2.8           NA         1.3 versicolor
## 110          4.6         3.4        1.400         0.3     setosa
## 111          6.3         3.4           NA         2.4  virginica
## 112          5.0         3.4        1.500         0.2     setosa
## 113          5.1         3.4        1.500         0.2     setosa
## 114           NA         3.3        5.700         2.1  virginica
## 115          6.7         3.1        4.700         1.5 versicolor
## 116          7.7         2.6        6.900         2.3  virginica
## 117          6.3          NA        4.400         1.3 versicolor
## 118          4.6         3.1        1.500         0.2     setosa
## 119           NA         3.0        5.500         2.1  virginica
## 120           NA         2.8        4.700         1.2 versicolor
## 121          5.9         3.0           NA         1.5 versicolor
## 122          4.5         2.3        1.300         0.3     setosa
## 123          6.4         3.2        5.300         2.3  virginica
## 124          5.2         4.1        1.500         0.1     setosa
## 125         49.0        30.0       14.000         2.0     setosa
## 126          5.6         2.9        3.600         1.3 versicolor
## 127          6.8         3.2        5.900         2.3  virginica
## 128          5.8          NA        5.100         2.4  virginica
## 129          4.6         3.6           NA         0.2     setosa
## 130          5.7         0.0        1.700         0.3     setosa
## 131          5.6         2.5        3.900         1.1 versicolor
## 132          6.7         3.1        4.400         1.4 versicolor
## 133          4.8          NA        1.900         0.2     setosa
## 134          5.1         3.3        1.700         0.5     setosa
## 135          4.4         3.0        1.300          NA     setosa
## 136          7.7         3.0           NA         2.3  virginica
## 137          4.7         3.2        1.600         0.2     setosa
## 138           NA         3.0        4.900         1.8  virginica
## 139          6.9         3.1        5.400         2.1  virginica
## 140          6.0         2.2        4.000         1.0 versicolor
## 141          5.0          NA        1.400         0.2     setosa
## 142          5.5          NA        3.800         1.1 versicolor
## 143          6.6         3.0        4.400         1.4 versicolor
## 144          6.3         2.9        5.600         1.8  virginica
## 145          5.7         2.5        5.000         2.0  virginica
## 146          6.7         3.1        5.600         2.4  virginica
## 147          5.6         3.0        4.500         1.5 versicolor
## 148          5.2         3.5        1.500         0.2     setosa
## 149          6.4         3.1           NA         1.8  virginica
## 150          5.8         2.6        4.000          NA versicolor

Detecting NA

sum(complete.cases(mydata))
#calculates the total number of complete cases in the dataset mydata
nrow(mydata[complete.cases(mydata), ])/nrow(mydata)*100
#calculates the percentage of complete cases in the dataset mydata
is.special <- function(x){
  if (is.numeric(x)) !is.finite(x) else is.na(x)
}

sapply(mydata, is.special)
#defines a function is.special to check for special values (NA or non-finite for numeric) and then applies this function to each column in the dataset mydata
for (n in colnames(mydata)){
  is.na(mydata[[n]]) <- is.special(mydata[[n]])
}
summary(mydata)
#replaces NA and non-finite values with NA in each column of mydata. It then provides a summary of the dataset

Exploring NA’s

vis_miss(airquality)
#create a visualization of missing values in the airquality dataset

Patterns

gg_miss_upset(airquality)
#display patterns of missing values in the airquality dataset
gg_miss_upset(riskfactors)
#display patterns of missing values in the riskfactors dataset

NA’s Mechanisms

# using regular geom_point()
ggplot(airquality,
       aes(x = Ozone,
           y = Solar.R)) +
geom_miss_point()
#create a scatter plot visualizing missing values in the 'Ozone' and 'Solar.R' variables in the airquality dataset

NA’s in vars

gg_miss_var(airquality)
#create a plot showing the distribution of missing values across variables in the airquality dataset
gg_miss_var(airquality) + labs(y = "Look at all the missing ones")
#modifies the previous plot by adding labels to make it more interpretable

NA’s in cases

gg_miss_case(airquality) + labs(x = "Number of Cases")
#Create a bar plot to visalize the number of missing values across cases in the airquality dataset

NA’s across factors

gg_miss_fct(x = riskfactors, fct = marital)
#create a plot visualizing missing values in the 'marital' factor variable in the riskfactors dataset

Summaries for NA’s

miss_var_summary(airquality)
#generate a summary of missing values in the airquality dataset
airquality %>%
  group_by(Month) %>%
  miss_var_summary()
#generate a summary of missing values in the airquality dataset grouped by the 'Month' variable

Identify outliers

tooth <- as.data.frame(ToothGrowth)
tooth$dose <- as.factor(tooth$dose)
p<-ggplot(tooth, aes(x=dose, y=len, color=dose)) +
  geom_boxplot()
p
#create a boxplot to identify outliers in the 'len' variable based on the 'dose' factor variable in the ToothGrowth dataset
tooth %>%
  group_by(dose) %>%
  identify_outliers(len)
#identify outliers in the 'len' variable grouped by the 'dose' factor variable in the ToothGrowth dataset

Checking consistency

RULE <- editset(c("Sepal.Length <= 30","Species %in% c('setosa','versicolor','virginica')"
               , "Sepal.Length > 0", "Sepal.Width > 0", "Petal.Length > 0", "Petal.Width > 0",
"Petal.Length >= 2 * Petal.Width", "Sepal.Length>Petal.Length"))
RULE
#define a set of rules (RULE) to check the consistency of data
summary(violatedEdits(RULE, mydata))
#summarize the violations of the defined rules in the dataset mydata
violated <- violatedEdits(RULE, mydata)
summary(violated)
#identify and summarize the violations of the defined rules in the dataset mydata
plot(violated)
#plot the violations of the defined rules in the dataset mydata after applying corrections

Corrections

cr <- correctionRules(expression(
  if (!is.na(Sepal.Width) && Sepal.Width <=0 ) Sepal.Width = NA
  ))
correctWithRules(cr, mydata)
# define correction rules to set Sepal.Width to NA if it is not NA and less than or equal to 0, and then applies these rules to the mydata dataset
mydata[localizeErrors(RULE, mydata)$adapt] <- NA
any(violatedEdits(RULE,mydata), na.rm=TRUE)
#set NA for adapted values based on rule violations in the mydata dataset and checks if there are still any rule violations

dlookr package

str(Carseats)
#display the structure of the Carseats dataset using the str function
carseats <- ISLR::Carseats
suppressWarnings(RNGversion("3.5.0"))
set.seed(123)
carseats[sample(seq(NROW(carseats)), 20), "Income"] <- NA
suppressWarnings(RNGversion("3.5.0"))
set.seed(456)
carseats[sample(seq(NROW(carseats)), 10), "Urban"] <- NA
#create the Carseats dataset and introduce missing values in the 'Income' and 'Urban' variables

Imputations of NA’s

income <- imputate_na(carseats, Income, US, method = "rpart")
  summary(income)
#impute missing values in the 'Income' variable of the carseats dataset using the rpart method and display a summary of the imputation
plot(income)  # vizualization of imputation
#visualize the imputation of missing values in the 'Income' variable of the carseats dataset
library(mice)
urban <- imputate_na(carseats, Urban, US, method = "mice")
#use the mice package to impute missing values in the 'Urban' variable of the carseats dataset
urban 
# display result of imputation for the 'Urban' variable in the carseats dataset
summary(urban) 
# summary of imputation for the 'Urban' variable in the carseats dataset
plot(urban) 
# vizualization of imputation of missing values in the 'Urban' variable of the carseats dataset

Standardization

carseats %>% 
  mutate(Income_minmax = transform(carseats$Income, method = "minmax"),
    Sales_minmax = transform(carseats$Sales, method = "minmax")) %>% 
  select(Income_minmax, Sales_minmax) %>% 
  boxplot()
#create a boxplot for the standardized 'Income_minmax' and 'Sales_minmax' variables in the carseats dataset

Binning

bin <- binning(carseats$Income) # Binning the carat variable. default type argument is "quantile"
bin # Print bins class object
# create bins for the 'Income' variable in the carseats dataset using default quantile binning and prints the bins class object
summary(bin) 
# Summarize bins class object created for the 'Income' variable
plot(bin) 
# Plot bins class object created for the 'Income' variable
 carseats %>%
 mutate(Income_bin = binning(carseats$Income) %>% 
                     extract()) %>%
 group_by(ShelveLoc, Income_bin) %>%
 summarise(freq = n()) %>%
 arrange(desc(freq)) %>%
 head(10)
 # create a frequency table for the 'ShelveLoc' and 'Income_bin' variables in the carseats dataset
income_fixed<- binning(carseats$Income, nbins = 4,
                   labels = c("low", "average", "high", "very high"))
summary(income_fixed)
#create fixed bins for the 'Income' variable in the carseats dataset with specified labels and display a summary of the result
plot(income_fixed)
# visualize the fixed bins for the 'Income' variable
bin <- binning_by(carseats, "US", "Advertising")
#create bins for the 'Advertising' variable
summary(bin)
#summary of the bins class object created for the 'Advertising' variable in the carseats dataset
plot(bin)
#plot the bins class object created for the 'Advertising' variable

Exercise 1.

Find out which observations have too long sepals using the result of violatedEdits.

# solution for the exercise 1 here ;-)'
# if (length(violated) > 0) {
 # print(mydata[violated, ])
#}'
# the code is made as comment as Rstudio does not see "violated" during knitting

Exercise 3.

# The mean before and after the imputation of the Income variable
MICE <- imputate_na(carseats, Income, Education, method = "mice")
## 
##  iter imp variable
##   1   1  Income  Urban
##   1   2  Income  Urban
##   1   3  Income  Urban
##   1   4  Income  Urban
##   1   5  Income  Urban
##   2   1  Income  Urban
##   2   2  Income  Urban
##   2   3  Income  Urban
##   2   4  Income  Urban
##   2   5  Income  Urban
##   3   1  Income  Urban
##   3   2  Income  Urban
##   3   3  Income  Urban
##   3   4  Income  Urban
##   3   5  Income  Urban
##   4   1  Income  Urban
##   4   2  Income  Urban
##   4   3  Income  Urban
##   4   4  Income  Urban
##   4   5  Income  Urban
##   5   1  Income  Urban
##   5   2  Income  Urban
##   5   3  Income  Urban
##   5   4  Income  Urban
##   5   5  Income  Urban
summary(MICE)
## * Impute missing values based on Multivariate Imputation by Chained Equations
##  - method : mice
##  - random seed : 37295
## 
## * Information of Imputation (before vs after)
##                     Original     Imputation  
## described_variables "value"      "value"     
## n                   "380"        "400"       
## na                  "20"         " 0"        
## mean                "68.86053"   "68.98650"  
## sd                  "28.09161"   "27.52111"  
## se_mean             "1.441069"   "1.376055"  
## IQR                 "48.25"      "45.25"     
## skewness            "0.04490600" "0.03761104"
## kurtosis            "-1.089201"  "-1.022505" 
## p00                 "21"         "21"        
## p01                 "21.79"      "21.99"     
## p05                 "26"         "26"        
## p10                 "30.0"       "30.9"      
## p20                 "39"         "40"        
## p25                 "42.75"      "44.75"     
## p30                 "48"         "52"        
## p40                 "62.00"      "62.36"     
## p50                 "69"         "69"        
## p60                 "78.00"      "77.32"     
## p70                 "86.3"       "84.0"      
## p75                 "91"         "90"        
## p80                 "96.2"       "95.2"      
## p90                 "108.1"      "106.1"     
## p95                 "115.05"     "115.00"    
## p99                 "119.21"     "119.01"    
## p100                "120"        "120"
 MICE <- as.data.frame(MICE)
 MICE 
##      MICE
## 1    73.0
## 2    48.0
## 3    35.0
## 4   100.0
## 5    64.0
## 6   113.0
## 7   105.0
## 8    81.0
## 9   110.0
## 10  113.0
## 11   78.0
## 12   94.0
## 13   35.0
## 14   28.0
## 15  117.0
## 16   95.0
## 17   66.2
## 18   62.6
## 19  110.0
## 20   76.0
## 21   90.0
## 22   29.0
## 23   46.0
## 24   31.0
## 25  119.0
## 26   32.0
## 27  115.0
## 28  118.0
## 29   74.0
## 30   99.0
## 31   94.0
## 32   58.0
## 33   32.0
## 34   38.0
## 35   54.0
## 36   84.0
## 37   76.0
## 38   41.0
## 39   73.0
## 40   70.8
## 41   98.0
## 42   53.0
## 43   69.0
## 44   42.0
## 45   79.0
## 46   63.0
## 47   90.0
## 48   98.0
## 49   52.0
## 50   93.0
## 51   32.0
## 52   90.0
## 53   40.0
## 54   64.0
## 55  103.0
## 56   81.0
## 57   82.0
## 58   91.0
## 59   93.0
## 60   71.0
## 61  102.0
## 62   32.0
## 63   45.0
## 64   88.0
## 65   67.0
## 66   26.0
## 67   92.0
## 68   61.0
## 69   69.0
## 70   59.0
## 71   81.0
## 72   51.0
## 73   45.0
## 74   90.0
## 75   68.0
## 76  111.0
## 77   87.0
## 78   71.0
## 79   48.0
## 80   67.0
## 81  100.0
## 82   72.0
## 83   83.0
## 84   36.0
## 85   25.0
## 86  103.0
## 87   84.0
## 88   67.0
## 89   42.0
## 90   66.0
## 91   22.0
## 92   46.0
## 93  113.0
## 94   30.0
## 95   53.8
## 96   25.0
## 97   42.0
## 98   82.0
## 99   77.0
## 100  47.0
## 101  69.0
## 102  93.0
## 103  22.0
## 104  91.0
## 105  96.0
## 106 100.0
## 107  33.0
## 108 107.0
## 109  79.0
## 110  65.0
## 111  62.0
## 112 118.0
## 113  99.0
## 114  29.0
## 115  87.0
## 116  60.8
## 117  75.0
## 118  53.0
## 119  88.0
## 120  94.0
## 121 105.0
## 122  89.0
## 123 100.0
## 124 103.0
## 125 113.0
## 126  88.2
## 127  68.0
## 128  48.0
## 129 100.0
## 130 120.0
## 131  84.0
## 132  69.0
## 133  87.0
## 134  98.0
## 135  31.0
## 136  94.0
## 137  75.0
## 138  42.0
## 139 103.0
## 140  62.0
## 141  60.0
## 142  42.0
## 143  84.0
## 144  88.0
## 145  68.0
## 146  63.0
## 147  83.0
## 148  54.0
## 149 119.0
## 150 120.0
## 151  84.0
## 152  58.0
## 153  78.0
## 154  36.0
## 155  69.0
## 156  72.0
## 157  34.0
## 158  58.0
## 159  90.0
## 160  60.0
## 161  28.0
## 162  21.0
## 163  80.4
## 164  64.0
## 165  64.0
## 166  58.0
## 167  67.0
## 168  73.0
## 169  89.0
## 170  41.0
## 171  39.0
## 172 106.0
## 173 102.0
## 174  91.0
## 175  24.0
## 176  89.0
## 177  77.8
## 178  72.0
## 179  95.0
## 180  25.0
## 181 112.0
## 182  83.0
## 183  60.0
## 184  74.0
## 185  33.0
## 186 100.0
## 187  51.0
## 188  32.0
## 189  37.0
## 190 117.0
## 191  37.0
## 192  42.0
## 193  26.0
## 194  70.0
## 195  98.0
## 196  93.0
## 197  28.0
## 198  61.0
## 199  80.0
## 200  88.0
## 201  92.0
## 202  83.0
## 203  78.0
## 204  82.0
## 205  80.0
## 206  22.0
## 207  67.0
## 208 105.0
## 209 100.2
## 210  21.0
## 211  41.0
## 212 118.0
## 213  69.0
## 214  84.0
## 215 115.0
## 216  83.0
## 217  71.2
## 218  44.0
## 219  61.0
## 220  79.0
## 221 120.0
## 222  72.2
## 223 119.0
## 224  45.0
## 225  82.0
## 226  25.0
## 227  33.0
## 228  64.0
## 229  73.0
## 230 104.0
## 231  60.0
## 232  69.0
## 233  80.0
## 234  76.0
## 235  62.0
## 236  32.0
## 237  34.0
## 238  28.0
## 239  24.0
## 240 105.0
## 241  80.0
## 242  63.0
## 243  46.0
## 244  25.0
## 245  30.0
## 246  43.0
## 247  56.0
## 248 114.0
## 249  52.0
## 250  67.0
## 251 105.0
## 252 111.0
## 253  97.0
## 254  24.0
## 255 104.0
## 256  81.0
## 257  40.0
## 258  62.0
## 259  38.0
## 260  36.0
## 261 117.0
## 262  42.0
## 263  60.4
## 264  26.0
## 265  29.0
## 266  35.0
## 267  93.0
## 268  82.0
## 269  57.0
## 270  69.0
## 271  26.0
## 272  56.0
## 273  33.0
## 274 106.0
## 275  93.0
## 276 119.0
## 277  69.0
## 278  48.0
## 279 113.0
## 280  57.0
## 281  86.0
## 282  69.0
## 283  96.0
## 284 110.0
## 285  46.0
## 286  26.0
## 287 118.0
## 288  44.0
## 289  40.0
## 290  77.0
## 291 111.0
## 292  70.0
## 293  66.0
## 294  84.0
## 295  76.0
## 296  35.0
## 297  44.0
## 298  83.0
## 299  63.0
## 300  40.0
## 301  78.0
## 302  93.0
## 303  77.0
## 304  52.0
## 305  98.0
## 306  29.0
## 307  32.0
## 308  92.0
## 309  80.0
## 310 111.0
## 311  65.0
## 312  68.0
## 313 117.0
## 314  81.0
## 315  79.4
## 316  21.0
## 317  36.0
## 318  30.0
## 319  72.0
## 320  45.0
## 321  70.0
## 322  39.0
## 323  50.0
## 324 105.0
## 325  65.0
## 326  69.0
## 327  30.0
## 328  38.0
## 329  66.0
## 330  54.0
## 331  59.0
## 332  63.0
## 333  33.0
## 334  60.0
## 335 117.0
## 336  70.0
## 337  35.0
## 338  38.0
## 339  24.0
## 340  44.0
## 341  29.0
## 342 120.0
## 343 102.0
## 344  42.0
## 345  80.0
## 346  68.0
## 347  65.0
## 348  39.0
## 349 102.0
## 350  27.0
## 351  68.8
## 352 115.0
## 353 103.0
## 354  67.0
## 355  31.0
## 356 100.0
## 357 109.0
## 358  73.0
## 359  96.0
## 360  62.0
## 361  86.0
## 362  25.0
## 363  55.0
## 364  69.8
## 365  21.0
## 366  30.0
## 367  56.0
## 368 106.0
## 369  22.0
## 370 100.0
## 371  41.0
## 372  81.0
## 373  60.2
## 374  71.4
## 375  47.0
## 376  46.0
## 377  60.0
## 378  61.0
## 379  88.0
## 380 111.0
## 381  64.0
## 382  65.0
## 383  28.0
## 384 117.0
## 385  37.0
## 386  73.0
## 387 116.0
## 388  73.0
## 389  89.0
## 390  42.0
## 391  75.0
## 392  63.0
## 393  42.0
## 394  51.0
## 395  58.0
## 396 108.0
## 397  53.4
## 398  26.0
## 399  79.0
## 400  37.0
 #Mean is shown in the R console before and after imputation of the income variable

Exercise 4.

# solution for the exercise 4 here ;-)
Outers <- imputate_outlier(carseats, Price, method = "capping" )
summary(Outers)
## Impute outliers with capping
## 
## * Information of Imputation (before vs after)
##                     Original     Imputation  
## described_variables "value"      "value"     
## n                   "400"        "400"       
## na                  "0"          "0"         
## mean                "115.7950"   "115.8928"  
## sd                  "23.67666"   "22.61092"  
## se_mean             "1.183833"   "1.130546"  
## IQR                 "31"         "31"        
## skewness            "-0.1252862" "-0.0461621"
## kurtosis            " 0.4518850" "-0.3030578"
## p00                 "24"         "54"        
## p01                 "54.99"      "67.96"     
## p05                 "77"         "77"        
## p10                 "87"         "87"        
## p20                 "96.8"       "96.8"      
## p25                 "100"        "100"       
## p30                 "104"        "104"       
## p40                 "110"        "110"       
## p50                 "117"        "117"       
## p60                 "122"        "122"       
## p70                 "128.3"      "128.3"     
## p75                 "131"        "131"       
## p80                 "134"        "134"       
## p90                 "146"        "146"       
## p95                 "155.0500"   "155.0025"  
## p99                 "166.05"     "164.02"    
## p100                "191"        "173"
  plot(Outers)

### Exercise 5.

# solution for the exercise 5 here ;-)
binn <- binning(carseats$Income, nbins = 4, type = c("quantile"))
summary(binn)
##          levels freq   rate
## 1 [21,42.41667]   95 0.2375
## 2 (42.41667,69]  102 0.2550
## 3       (69,91]   89 0.2225
## 4      (91,120]   94 0.2350
## 5          <NA>   20 0.0500
plot(binn)