You do NOT have to fill in the missing values. Just leave them as
NA.
summary(chickens)
## weight feed
## Length:71 Length:71
## Class :character Class :character
## Mode :character Mode :character
str(chickens)
## 'data.frame': 71 obs. of 2 variables:
## $ weight: chr "206" "140" NA "318" ...
## $ feed : chr "meatmeal" "horsebean" NA "sunflower" ...
view(chickens)
head(chickens)
## weight feed
## 1 206 meatmeal
## 2 140 horsebean
## 3 <NA> <NA>
## 4 318 sunflower
## 5 332 casein
## 6 na horsebean
str(chickens)
## 'data.frame': 71 obs. of 2 variables:
## $ weight: chr "206" "140" NA "318" ...
## $ feed : chr "meatmeal" "horsebean" NA "sunflower" ...
Calculate how many elements in the original ch_df are recognized as
NA by R.
is.na(chickens)
## weight feed
## [1,] FALSE FALSE
## [2,] FALSE FALSE
## [3,] TRUE TRUE
## [4,] FALSE FALSE
## [5,] FALSE FALSE
## [6,] FALSE FALSE
## [7,] FALSE FALSE
## [8,] FALSE FALSE
## [9,] FALSE FALSE
## [10,] FALSE FALSE
## [11,] FALSE FALSE
## [12,] FALSE FALSE
## [13,] FALSE FALSE
## [14,] FALSE FALSE
## [15,] FALSE FALSE
## [16,] FALSE FALSE
## [17,] FALSE FALSE
## [18,] FALSE FALSE
## [19,] FALSE FALSE
## [20,] FALSE FALSE
## [21,] FALSE FALSE
## [22,] FALSE FALSE
## [23,] FALSE FALSE
## [24,] FALSE FALSE
## [25,] FALSE FALSE
## [26,] FALSE FALSE
## [27,] FALSE FALSE
## [28,] FALSE FALSE
## [29,] FALSE FALSE
## [30,] TRUE FALSE
## [31,] FALSE FALSE
## [32,] FALSE FALSE
## [33,] FALSE FALSE
## [34,] FALSE FALSE
## [35,] FALSE FALSE
## [36,] FALSE FALSE
## [37,] FALSE FALSE
## [38,] FALSE FALSE
## [39,] FALSE FALSE
## [40,] FALSE FALSE
## [41,] FALSE FALSE
## [42,] FALSE FALSE
## [43,] FALSE TRUE
## [44,] FALSE FALSE
## [45,] FALSE FALSE
## [46,] FALSE FALSE
## [47,] FALSE FALSE
## [48,] FALSE FALSE
## [49,] FALSE FALSE
## [50,] FALSE FALSE
## [51,] FALSE FALSE
## [52,] TRUE FALSE
## [53,] FALSE FALSE
## [54,] FALSE FALSE
## [55,] FALSE FALSE
## [56,] FALSE FALSE
## [57,] FALSE FALSE
## [58,] FALSE FALSE
## [59,] FALSE FALSE
## [60,] TRUE FALSE
## [61,] FALSE FALSE
## [62,] FALSE FALSE
## [63,] FALSE FALSE
## [64,] FALSE FALSE
## [65,] FALSE FALSE
## [66,] FALSE FALSE
## [67,] FALSE FALSE
## [68,] FALSE FALSE
## [69,] FALSE TRUE
## [70,] FALSE FALSE
## [71,] FALSE FALSE
which(is.na(chickens))
## [1] 3 30 52 60 74 114 140
sum(is.na(chickens))
## [1] 7
Change all of the missing elements to NA in ch_df.
mis_ele_na <- chickens
mis_ele_na[mis_ele_na == "" | mis_ele_na ==" "] <-NA
mis_ele_na[mis_ele_na == "na"] <- NA
mis_ele_na
## weight feed
## 1 206 meatmeal
## 2 140 horsebean
## 3 <NA> <NA>
## 4 318 sunflower
## 5 332 casein
## 6 <NA> horsebean
## 7 216 <NA>
## 8 143 horsebean
## 9 271 soybean
## 10 315 meatmeal
## 11 227 horsebean
## 12 N/A sunflower
## 13 322 sunflower
## 14 352 casein
## 15 329 not sure
## 16 N/A linseed
## 17 379 casein
## 18 153 ?
## 19 N/A linseed
## 20 213 linseed
## 21 257 <NA>
## 22 179 horsebean
## 23 380 meatmeal
## 24 327 soybean
## 25 260 linseed
## 26 168 horsebean
## 27 248 soybean
## 28 181 linseed
## 29 160 horsebean
## 30 <NA> sunflower
## 31 <NA> soybean
## 32 340 sunflower
## 33 260 casein
## 34 169 ?
## 35 171 soybean
## 36 368 casein
## 37 283 casein
## 38 334 sunflower
## 39 - unknown
## 40 309 linseed
## 41 <NA> soybean
## 42 295 ?
## 43 404 <NA>
## 44 392 sunflower
## 45 <NA> casein
## 46 267 soybean
## 47 303 meatmeal
## 48 250 soybean
## 49 243 soybean
## 50 108 horsebean
## 51 229 linseed
## 52 <NA> horsebean
## 53 222 casein
## 54 344 meatmeal
## 55 263 unknown
## 56 148 linseed
## 57 318 casein
## 58 - meatmeal
## 59 258 meatmeal
## 60 <NA> sunflower
## 61 325 meatmeal
## 62 217 <NA>
## 63 271 linseed
## 64 244 linseed
## 65 341 sunflower
## 66 141 ?
## 67 158 soybean
## 68 423 sunflower
## 69 316 <NA>
## 70 <NA> soybean
## 71 <NA> casein
view(mis_ele_na)
mis_ele_na <- mis_ele_na %>%
mutate(weight = replace(weight, weight == "-", NA)) %>%
mutate(feed = replace(feed, feed == "?", NA)) %>%
mutate(feed = replace(feed, feed == " ", NA))
view(mis_ele_na)