Execute the following cell to load the tidyverse library:

library(tidyverse)

Execute the following cell to load the data. Refer to this website http://archive.ics.uci.edu/ml/datasets/Auto+MPG for details on the dataset:

autompg = read.table(
  "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data",
  quote = "\"",
  comment.char = "",
  stringsAsFactors = FALSE)
head(autompg,20)

Task 1: print the structure of the unedited data set. How many samples and features are there?

str(autompg)
print("There are 398 samples and 9 features.") 

Execute the following cell to assign names to the columns of the dataframe:

colnames(autompg) = c("mpg", "cyl", "disp", "hp", "wt", "acc", "year", "origin", "name")

Task-2: complete the code segment below to remove samples with missing horsepower (hp) values represented as a “?” in the dataset.

autompg = autompg %>% filter(hp != '?')
print(autompg)

Task-3: complete the code segment below to remove samples with the name “plymouth reliant”

autompg = autompg %>% filter(name != "plymouth reliant")
print(autompg)

Task-4: complete the code segment below to select all features except ‘name’

autompg = autompg %>% select(-"name")
(autompg)

Execute the following cell to change the type of hp values from character to numeric:

autompg$hp = as.numeric(autompg$hp)

Execute the following code cell to modify ‘origin’ column to reflect local (1) and international models (0)

autompg = autompg %>% mutate(origin = ifelse(!(origin %in% c(2, 3)), 'local', 'international'))
head(autompg, 20)

Task 5: print the structure of the dataframe. What types are the columns ‘cyl’ and ‘origin’?

str(autompg)
print("'cyl' is integer type and 'origin' is character type.")

Task-6: complete the code segment below to change the types of ‘cyl’ and ‘origin’ columns to factor

catcols = c('cyl', 'origin')
autompg[catcols] = lapply(autompg[catcols], as.factor)
str(autompg)

Task-7: complete the code segment below to create a scatter plot of mpg vs. displacement by color coding the points according to the origin (local or international), Comment on what you observe:

p = ggplot(data =autompg , aes(x = mpg , y = disp , color = factor(origin))) +
  geom_point(size = 1.8)
p
# As mpg increases, displacement decreases. International cars have more mileage than local cars. Local cars have more displacement which keeps decreasing as the mileage increases.
LS0tDQp0aXRsZTogIkFjdGl2aXR5IChHcmFkZWQpIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KRXhlY3V0ZSB0aGUgZm9sbG93aW5nIGNlbGwgdG8gbG9hZCB0aGUgdGlkeXZlcnNlIGxpYnJhcnk6DQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KYGBgDQpFeGVjdXRlIHRoZSBmb2xsb3dpbmcgY2VsbCB0byBsb2FkIHRoZSBkYXRhLiBSZWZlciB0byB0aGlzIHdlYnNpdGUgaHR0cDovL2FyY2hpdmUuaWNzLnVjaS5lZHUvbWwvZGF0YXNldHMvQXV0bytNUEcNCmZvciBkZXRhaWxzIG9uIHRoZSBkYXRhc2V0Og0KYGBge3J9DQphdXRvbXBnID0gcmVhZC50YWJsZSgNCiAgImh0dHA6Ly9hcmNoaXZlLmljcy51Y2kuZWR1L21sL21hY2hpbmUtbGVhcm5pbmctZGF0YWJhc2VzL2F1dG8tbXBnL2F1dG8tbXBnLmRhdGEiLA0KICBxdW90ZSA9ICJcIiIsDQogIGNvbW1lbnQuY2hhciA9ICIiLA0KICBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpDQpoZWFkKGF1dG9tcGcsMjApDQpgYGANCioqVGFzayAxKio6IHByaW50IHRoZSBzdHJ1Y3R1cmUgb2YgdGhlIHVuZWRpdGVkIGRhdGEgc2V0LiBIb3cgbWFueSBzYW1wbGVzIGFuZCBmZWF0dXJlcyBhcmUgdGhlcmU/DQpgYGB7cn0NCnN0cihhdXRvbXBnKQ0KcHJpbnQoIlRoZXJlIGFyZSAzOTggc2FtcGxlcyBhbmQgOSBmZWF0dXJlcy4iKSANCmBgYA0KDQpFeGVjdXRlIHRoZSBmb2xsb3dpbmcgY2VsbCB0byBhc3NpZ24gbmFtZXMgdG8gdGhlIGNvbHVtbnMgb2YgdGhlIGRhdGFmcmFtZToNCmBgYHtyfQ0KY29sbmFtZXMoYXV0b21wZykgPSBjKCJtcGciLCAiY3lsIiwgImRpc3AiLCAiaHAiLCAid3QiLCAiYWNjIiwgInllYXIiLCAib3JpZ2luIiwgIm5hbWUiKQ0KYGBgDQoqKlRhc2stMioqOiBjb21wbGV0ZSB0aGUgY29kZSBzZWdtZW50IGJlbG93IHRvIHJlbW92ZSBzYW1wbGVzIHdpdGggbWlzc2luZyBob3JzZXBvd2VyIChocCkgdmFsdWVzIHJlcHJlc2VudGVkIGFzIGEgIj8iIGluIHRoZSBkYXRhc2V0Lg0KYGBge3J9DQphdXRvbXBnID0gYXV0b21wZyAlPiUgZmlsdGVyKGhwICE9ICc/JykNCnByaW50KGF1dG9tcGcpDQpgYGANCg0KKipUYXNrLTMqKjogY29tcGxldGUgdGhlIGNvZGUgc2VnbWVudCBiZWxvdyB0byByZW1vdmUgc2FtcGxlcyB3aXRoIHRoZSBuYW1lICJwbHltb3V0aCByZWxpYW50Ig0KYGBge3J9DQphdXRvbXBnID0gYXV0b21wZyAlPiUgZmlsdGVyKG5hbWUgIT0gInBseW1vdXRoIHJlbGlhbnQiKQ0KcHJpbnQoYXV0b21wZykNCmBgYA0KKipUYXNrLTQqKjogY29tcGxldGUgdGhlIGNvZGUgc2VnbWVudCBiZWxvdyB0byBzZWxlY3QgYWxsIGZlYXR1cmVzIGV4Y2VwdCAnbmFtZScNCmBgYHtyfQ0KYXV0b21wZyA9IGF1dG9tcGcgJT4lIHNlbGVjdCgtIm5hbWUiKQ0KKGF1dG9tcGcpDQpgYGANCkV4ZWN1dGUgdGhlIGZvbGxvd2luZyBjZWxsIHRvIGNoYW5nZSB0aGUgdHlwZSBvZiBocCB2YWx1ZXMgZnJvbSBjaGFyYWN0ZXIgdG8gbnVtZXJpYzoNCmBgYHtyfQ0KYXV0b21wZyRocCA9IGFzLm51bWVyaWMoYXV0b21wZyRocCkNCmBgYA0KRXhlY3V0ZSB0aGUgZm9sbG93aW5nIGNvZGUgY2VsbCB0byBtb2RpZnkgJ29yaWdpbicgY29sdW1uIHRvIHJlZmxlY3QgbG9jYWwgKDEpIGFuZCBpbnRlcm5hdGlvbmFsIG1vZGVscyAoMCkNCmBgYHtyfQ0KYXV0b21wZyA9IGF1dG9tcGcgJT4lIG11dGF0ZShvcmlnaW4gPSBpZmVsc2UoIShvcmlnaW4gJWluJSBjKDIsIDMpKSwgJ2xvY2FsJywgJ2ludGVybmF0aW9uYWwnKSkNCmhlYWQoYXV0b21wZywgMjApDQpgYGANCioqVGFzayA1Kio6IHByaW50IHRoZSBzdHJ1Y3R1cmUgb2YgdGhlIGRhdGFmcmFtZS4gV2hhdCB0eXBlcyBhcmUgdGhlIGNvbHVtbnMgJ2N5bCcgYW5kICdvcmlnaW4nPw0KYGBge3J9DQpzdHIoYXV0b21wZykNCnByaW50KCInY3lsJyBpcyBpbnRlZ2VyIHR5cGUgYW5kICdvcmlnaW4nIGlzIGNoYXJhY3RlciB0eXBlLiIpDQpgYGANCioqVGFzay02Kio6IGNvbXBsZXRlIHRoZSBjb2RlIHNlZ21lbnQgYmVsb3cgdG8gY2hhbmdlIHRoZSB0eXBlcyBvZiAnY3lsJyBhbmQgJ29yaWdpbicgY29sdW1ucyB0byBmYWN0b3INCmBgYHtyfQ0KY2F0Y29scyA9IGMoJ2N5bCcsICdvcmlnaW4nKQ0KYXV0b21wZ1tjYXRjb2xzXSA9IGxhcHBseShhdXRvbXBnW2NhdGNvbHNdLCBhcy5mYWN0b3IpDQpzdHIoYXV0b21wZykNCmBgYA0KKipUYXNrLTcqKjogY29tcGxldGUgdGhlIGNvZGUgc2VnbWVudCBiZWxvdyB0byBjcmVhdGUgYSBzY2F0dGVyIHBsb3Qgb2YgbXBnIHZzLiBkaXNwbGFjZW1lbnQgYnkgY29sb3IgY29kaW5nIHRoZSBwb2ludHMgYWNjb3JkaW5nIHRvIHRoZSBvcmlnaW4gKGxvY2FsIG9yIGludGVybmF0aW9uYWwpLCBDb21tZW50IG9uIHdoYXQgeW91IG9ic2VydmU6DQpgYGB7cn0NCnAgPSBnZ3Bsb3QoZGF0YSA9YXV0b21wZyAsIGFlcyh4ID0gbXBnICwgeSA9IGRpc3AgLCBjb2xvciA9IGZhY3RvcihvcmlnaW4pKSkgKw0KICBnZW9tX3BvaW50KHNpemUgPSAxLjgpDQpwDQojIEFzIG1wZyBpbmNyZWFzZXMsIGRpc3BsYWNlbWVudCBkZWNyZWFzZXMuIEludGVybmF0aW9uYWwgY2FycyBoYXZlIG1vcmUgbWlsZWFnZSB0aGFuIGxvY2FsIGNhcnMuIExvY2FsIGNhcnMgaGF2ZSBtb3JlIGRpc3BsYWNlbWVudCB3aGljaCBrZWVwcyBkZWNyZWFzaW5nIGFzIHRoZSBtaWxlYWdlIGluY3JlYXNlcy4NCmBgYA==