Cargando los datos

data <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearningR/data/t1/missing-data.csv", na.strings = "")
str(data)
## 'data.frame':    27 obs. of  3 variables:
##  $ Income    : int  89800 47500 45000 44700 59500 NA 63300 52900 78200 145100 ...
##  $ Phone_type: chr  "Android" "Android" "iPhone" NA ...
##  $ Car_type  : chr  "Luxury" "Non-Luxury" "Luxury" "Luxury" ...
head(data, 22)
##    Income Phone_type   Car_type
## 1   89800    Android     Luxury
## 2   47500    Android Non-Luxury
## 3   45000     iPhone     Luxury
## 4   44700       <NA>     Luxury
## 5   59500     iPhone     Luxury
## 6      NA    Android Non-Luxury
## 7   63300     iPhone Non-Luxury
## 8   52900    Android     Luxury
## 9   78200    Android     Luxury
## 10 145100     iPhone     Luxury
## 11  88600     iPhone Non-Luxury
## 12  65600     iPhone     Luxury
## 13     NA    Android Non-Luxury
## 14  94600    Android     Luxury
## 15  59400     iPhone     Luxury
## 16  47300     iPhone Non-Luxury
## 17  72100       <NA>     Luxury
## 18      0     iPhone Non-Luxury
## 19      0    Android     Luxury
## 20  83000     iPhone     Luxury
## 21  64100    Android Non-Luxury
## 22  42100     iPhone Non-Luxury
data.cleaned <- na.omit(data)
head(data.cleaned)
##   Income Phone_type   Car_type
## 1  89800    Android     Luxury
## 2  47500    Android Non-Luxury
## 3  45000     iPhone     Luxury
## 5  59500     iPhone     Luxury
## 7  63300     iPhone Non-Luxury
## 8  52900    Android     Luxury
is.na(data[4,2])
## [1] TRUE
is.na(data[4,1])
## [1] FALSE
is.na(data$Income)
##  [1] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [13]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE

Limpiar NA de solamente la variable Income

data.income.cleaned <- data[!is.na(data$Income),]
head(data.income.cleaned, 22)
##    Income Phone_type   Car_type
## 1   89800    Android     Luxury
## 2   47500    Android Non-Luxury
## 3   45000     iPhone     Luxury
## 4   44700       <NA>     Luxury
## 5   59500     iPhone     Luxury
## 7   63300     iPhone Non-Luxury
## 8   52900    Android     Luxury
## 9   78200    Android     Luxury
## 10 145100     iPhone     Luxury
## 11  88600     iPhone Non-Luxury
## 12  65600     iPhone     Luxury
## 14  94600    Android     Luxury
## 15  59400     iPhone     Luxury
## 16  47300     iPhone Non-Luxury
## 17  72100       <NA>     Luxury
## 18      0     iPhone Non-Luxury
## 19      0    Android     Luxury
## 20  83000     iPhone     Luxury
## 21  64100    Android Non-Luxury
## 22  42100     iPhone Non-Luxury
## 23      0     iPhone     Luxury
## 24  91500     iPhone Non-Luxury

Filas completas para un data frame

complete.cases(data)
##  [1]  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [13] FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [25]  TRUE  TRUE  TRUE
data.cleaned.2 <- data[complete.cases(data), ]

head(data.cleaned.2)
##   Income Phone_type   Car_type
## 1  89800    Android     Luxury
## 2  47500    Android Non-Luxury
## 3  45000     iPhone     Luxury
## 5  59500     iPhone     Luxury
## 7  63300     iPhone Non-Luxury
## 8  52900    Android     Luxury

Convertir los ceros de ingresos en NA

(data$Income[data$Income == 0] <- NA)
## [1] NA
head(data$Income, 22)
##  [1]  89800  47500  45000  44700  59500     NA  63300  52900  78200 145100
## [11]  88600  65600     NA  94600  59400  47300  72100     NA     NA  83000
## [21]  64100  42100

Medidas de centralización y dispersión

mean(data$Income)
## [1] NA
mean(data$Income, na.rm = TRUE)
## [1] 65763.64
sd(data$Income)
## [1] NA
sd(data$Income, na.rm = TRUE)
## [1] 26715.87