In R, the missing values are shown by the symbol NA. To identify missing values in your dataset the function is.na() is normally used.
## Example
# ---
# Lets create a dataset dt
# ---
# OUR CODE GOES BELOW
#
Name <- c("John", "Tim", NA)
Sex <- c("men", "men", "women")
Age <- c(45, 53, NA)
dt <- data.frame(Name, Sex, Age)
# Then print out this dataset below
dt
# Lets Identify missing data in your dataset
# by using the function is.na()
# ---
#
is.na(dt)
# Example
# ---
# We can also find out total missing values in each column
# by using the function colSums()
# ---
# OUR CODE GOES BELOW
#
colSums(is.na(dt))
Using na.omit() to omit all rows containing missing values.
## Example
# ---
# Question: Show all rows from the dataset which don't contain any missing values
# ---
# OUR CODE GOES BELOW
#
na.omit(dt)
## Example
# ---
# Question: Recode/fill the missing value in a column with a number
# ---
# OUR CODE GOES BELOW
#
dt$Age[is.na(dt$Age)] <- 9
dt
## Example
# ---
# Question: Recode or fill the missing value in a column with the mean value of the column-#-
# ---
# OUR CODE GOES BELOW
#
dt$Age[is.na(dt$Age)] <- mean(dt$Age, na.rm = TRUE)
# print the dt table below
dt
## Challenge 1
# ---
# Question: Using the given bus dataset below, recode the missing values of the payment_method
# and travel_to columns with athen appropriate values
# ---
# OUR CODE GOES BELOW
#
# Lets first of all import our data table
# ---
#
library("data.table")
bus_dataset <- fread('http://bit.ly/BusNairobiWesternTransport')
# First check have a look at the dataset
# --
#
head(bus_dataset)
is.na(bus_dataset)
# Counting the number of missing values
colSums(is.na(bus_dataset))
#No missing values to sort
## Challenge 2
# ---
# Question: Clean the given dataset
# ---
# Dataset url = http://bit.ly/MS-PropertyDataset
# ---
# OUR CODE GOES BELOW
#
property <- fread('http://bit.ly/MS-PropertyDataset')
head(property)
#Finding number of missing values
colSums(is.na(property))
#There are 4 missung values in multiple columns
#We will just omit them
na.omit(property)
## Challenge 3
# ---
# Question:
# ---
# Dataset url = http://bit.ly/AirQualityDataset
# ---
# OUR CODE GOES BELOW
#
air <- fread('http://bit.ly/AirQualityDataset')
head(air)
#The link is not working, found the data set
air2 <- fread('D:/R Stuff/R Stuff Moringa/Learning/Data/AirQualityUCI.csv')
air2
#This is working
colSums(is.na(air2))
#Lots of missing values we will replace the ones missing alot and drop the smaller ones
air2$V17[is.na(air2$V17)] <- mean(air2$V17, na.rm = TRUE)
colSums(is.na(air2))
air2$PT08.S1[is.na(air2$PT08.S1)] <- 9
colSums(is.na(air2))
#My edits dont seem to be taking effect,probaly a syntax issue or calling the column names wrong