Pre_processing

##Pre processing Basic data pre processing Also set the working directory. Then read the data

GRAPHICAL METHODS FOR IDENTIFYING OUTLIERS

Set up the plot area Create the histogram bars Trying to examine the presence of outliers using histogram, an informal way of finding the outliers

hist(cars$weightlbs,
     breaks = 30,
     xlim = c(1500, 5000),
     col = "blue",
     border = "black",
     ylim = c(0, 20),
     xlab = "Weight",
     ylab = "Counts",
     main = "Histogram
of Car Weights")

##examining scaller plot for outliers

plot(cars$weightlbs,
     cars$mpg,
     xlim = c(1500, 5000),
     ylim = c(0, 50),
     xlab = "Weight",
     ylab = "MPG",
     main = "Scatterplot of MPG by Weight",
     type = "p",
     pch = 20,
     col = "blue")

###Results - Scatter Plot Majority of data is around the mean and there is no specific outliers in data between mpg and weight.

##box plot Examining outliers using box plot

boxplot(mpg ~ cylinders, data = cars, xlab = "Number of Cylinders",
        ylab = "Miles Per Gallon", main = "Mileage Data")

box <- boxplot(mpg ~ brand, data = cars, xlab = "brand",
        ylab = "Miles Per Gallon", main = "Mileage Data")

##box plot for only mileage

box <- boxplot(cars$mpg,  xlab = "box",
               ylab = "Miles Per Gallon", main = "Mileage Data")$out

##New data for outliers creating data with outliers

cars1 <- edit(cars)

##$out is used to get the outlier values. Can see in the environment

box1 <- boxplot(cars1$mpg,  xlab = "box",
               ylab = "Miles Per Gallon", main = "Mileage Data")$out

##To get only the outlier value without plot

box2 <- boxplot(cars1$mpg, plot = F)$out

##For identifying complete row having outlier value

outliers_data <- cars1[which(cars1$mpg %in% box2),]
outliers_data

## [1] Sl..No      mpg         cylinders   cubicinches hp          weightlbs  
## [7] time.to.60  year        brand      
## <0 rows> (or 0-length row.names)

View(cars1)

##Downloading the data

write.csv(outliers_data, "outliers_cars.csv")

##Deleting the outliers data. To include “-which” instead of “which”

cars2 <- cars1[-which(cars1$mpg %in% box2),]

##View(cars2)

##data2 <- cars[-which(cars$mpg %in% box),]

Pre_processing_cars

Ninad

2022-07-30

GRAPHICAL METHODS FOR IDENTIFYING OUTLIERS