getwd()
[1] "/cloud/project"
usedcars <- read.csv("usedcars.csv", stringsAsFactors = FALSE)
#Get structure of used car data
str(usedcars)
'data.frame':   150 obs. of  6 variables:
 $ year        : int  2011 2011 2011 2011 2012 2010 2011 2010 2011 2010 ...
 $ model       : chr  "SEL" "SEL" "SEL" "SEL" ...
 $ price       : int  21992 20995 19995 17809 17500 17495 17000 16995 16995 16995 ...
 $ mileage     : int  7413 10926 7351 11613 8367 25125 27393 21026 32655 36116 ...
 $ color       : chr  "Yellow" "Gray" "Silver" "Gray" ...
 $ transmission: chr  "AUTO" "AUTO" "AUTO" "AUTO" ...
usedcars
str(usedcars)
'data.frame':   150 obs. of  6 variables:
 $ year        : int  2011 2011 2011 2011 2012 2010 2011 2010 2011 2010 ...
 $ model       : chr  "SEL" "SEL" "SEL" "SEL" ...
 $ price       : int  21992 20995 19995 17809 17500 17495 17000 16995 16995 16995 ...
 $ mileage     : int  7413 10926 7351 11613 8367 25125 27393 21026 32655 36116 ...
 $ color       : chr  "Yellow" "Gray" "Silver" "Gray" ...
 $ transmission: chr  "AUTO" "AUTO" "AUTO" "AUTO" ...

##Exploring numeric variables —

# Summarize numeric variables
summary(usedcars$year)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2000    2008    2009    2009    2010    2012 
summary(usedcars[c("price","mileage")])
     price          mileage      
 Min.   : 3800   Min.   :  4867  
 1st Qu.:10995   1st Qu.: 27200  
 Median :13592   Median : 36385  
 Mean   :12962   Mean   : 44261  
 3rd Qu.:14904   3rd Qu.: 55124  
 Max.   :21992   Max.   :151479  
#Calculate the mean income
(36000 + 44000 + 56000 ) / 3
[1] 45333.33
#Mean
mean(c(36000,44000,56000))
[1] 45333.33
#The median income
median(c(36000,44000,56000))
[1] 44000
#The min/max of used car prices
range(usedcars$price)
[1]  3800 21992
#The difference of the range
diff(range(usedcars$price))
[1] 18192
#IQR for used car prices
IQR(usedcars$price)
[1] 3909.5
#Use quantile to calculate five-number summary
quantile(usedcars$price)
     0%     25%     50%     75%    100% 
 3800.0 10995.0 13591.5 14904.5 21992.0 
#Use quantile to calculate 5 number summary
quantile(usedcars$price)
     0%     25%     50%     75%    100% 
 3800.0 10995.0 13591.5 14904.5 21992.0 
#The 99th percentile
quantile(usedcars$price, probs = c(0.01,0.99))
      1%      99% 
 5428.69 20505.00 
#Quintiles
quantile(usedcars$price, seq(from=0, to=1, by=0.20))
     0%     20%     40%     60%     80%    100% 
 3800.0 10759.4 12993.8 13992.0 14999.0 21992.0 
#Boxplot of used car prices and mileage
boxplot(usedcars$price, main="Boxplot of Used Car Prices",
        ylab="Price ($)")

boxplot(usedcars$mileage, main="Boxplot of Used Car Mileage",
        ylab="Odometer (mi.)")

#Histograms of used car prices and mileage
hist(usedcars$price, main="Histogram of Used Car Prices",
     xlab="Price ($)")

hist(usedcars$mileage, main = "Histogram of Used Car Mileage",
     xlab = "Odometer (mi.)")

#Variance and standard deviation of the used car data
var(usedcars$price)
[1] 9749892
sd(usedcars$price)
[1] 3122.482
var(usedcars$mileage)
[1] 728033954
sd(usedcars$mileage)
[1] 26982.1

##Exploring Numeric Variables —

#One-way tables for the used car data
table(usedcars$year)

2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 
   3    1    1    1    3    2    6   11   14   42   49   16    1 
table(usedcars$model)

 SE SEL SES 
 78  23  49 
table(usedcars$color)

 Black   Blue   Gold   Gray  Green    Red Silver  White Yellow 
    35     17      1     16      5     25     32     16      3 
#Compute table proportions
model_table <- table(usedcars$model)
prop.table(model_table)

       SE       SEL       SES 
0.5200000 0.1533333 0.3266667 
#Round the data
color_table <- table(usedcars$color)
color_pct <- prop.table(color_table) * 100
round(color_pct,digits = 1)

 Black   Blue   Gold   Gray  Green    Red Silver  White Yellow 
  23.3   11.3    0.7   10.7    3.3   16.7   21.3   10.7    2.0 

##Exploring relationships between variables —

#Scatterplot of price vs. mileage
plot(x = usedcars$mileage, y = usedcars$price,
     main = "Scatterplot of Price vs. Mileage",
     xlab = "Used Car Odometer (mi.)",
     ylab = "Used Car Price ($)")

#New Variable Indicating Conservative Colors
usedcars$conservative <-
  usedcars$color %in% c("Black","Gray","Silver","White")
#Checking our Variable
table(usedcars$conservative)

FALSE  TRUE 
   51    99 
install.packages("gmodels")
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/gtools_3.9.5.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/gdata_3.0.1.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/gmodels_2.19.1.tar.gz'

The downloaded source packages are in
    ‘/tmp/RtmpjiN2vb/downloaded_packages’
#Crosstab of conservative by model
library(gmodels)
CrossTable(x = usedcars$model, y = usedcars$conservative)

 
   Cell Contents
|-------------------------|
|                       N |
| Chi-square contribution |
|           N / Row Total |
|           N / Col Total |
|         N / Table Total |
|-------------------------|

 
Total Observations in Table:  150 

 
               | usedcars$conservative 
usedcars$model |     FALSE |      TRUE | Row Total | 
---------------|-----------|-----------|-----------|
            SE |        27 |        51 |        78 | 
               |     0.009 |     0.004 |           | 
               |     0.346 |     0.654 |     0.520 | 
               |     0.529 |     0.515 |           | 
               |     0.180 |     0.340 |           | 
---------------|-----------|-----------|-----------|
           SEL |         7 |        16 |        23 | 
               |     0.086 |     0.044 |           | 
               |     0.304 |     0.696 |     0.153 | 
               |     0.137 |     0.162 |           | 
               |     0.047 |     0.107 |           | 
---------------|-----------|-----------|-----------|
           SES |        17 |        32 |        49 | 
               |     0.007 |     0.004 |           | 
               |     0.347 |     0.653 |     0.327 | 
               |     0.333 |     0.323 |           | 
               |     0.113 |     0.213 |           | 
---------------|-----------|-----------|-----------|
  Column Total |        51 |        99 |       150 | 
               |     0.340 |     0.660 |           | 
---------------|-----------|-----------|-----------|

 
LS0tCnRpdGxlOiAiQWN0aXZpdHkgNDogRXhwbG9yaW5nICYgVW5kZXJzdGFuZGluZyBEYXRhIHdpdGggUiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCgpgYGB7cn0KZ2V0d2QoKQpgYGAKCmBgYHtyfQp1c2VkY2FycyA8LSByZWFkLmNzdigidXNlZGNhcnMuY3N2Iiwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQpgYGAKCmBgYHtyfQojR2V0IHN0cnVjdHVyZSBvZiB1c2VkIGNhciBkYXRhCnN0cih1c2VkY2FycykKYGBgCgoKYGBge3J9CnVzZWRjYXJzCmBgYAoKYGBge3J9CnN0cih1c2VkY2FycykKYGBgCgoKIyNFeHBsb3JpbmcgbnVtZXJpYyB2YXJpYWJsZXMgLS0tCgoKYGBge3J9CiMgU3VtbWFyaXplIG51bWVyaWMgdmFyaWFibGVzCnN1bW1hcnkodXNlZGNhcnMkeWVhcikKYGBgCgpgYGB7cn0Kc3VtbWFyeSh1c2VkY2Fyc1tjKCJwcmljZSIsIm1pbGVhZ2UiKV0pCmBgYAoKYGBge3J9CiNDYWxjdWxhdGUgdGhlIG1lYW4gaW5jb21lCigzNjAwMCArIDQ0MDAwICsgNTYwMDAgKSAvIDMKYGBgCgpgYGB7cn0KI01lYW4KbWVhbihjKDM2MDAwLDQ0MDAwLDU2MDAwKSkKYGBgCgpgYGB7cn0KI1RoZSBtZWRpYW4gaW5jb21lCm1lZGlhbihjKDM2MDAwLDQ0MDAwLDU2MDAwKSkKYGBgCgpgYGB7cn0KI1RoZSBtaW4vbWF4IG9mIHVzZWQgY2FyIHByaWNlcwpyYW5nZSh1c2VkY2FycyRwcmljZSkKYGBgCgpgYGB7cn0KI1RoZSBkaWZmZXJlbmNlIG9mIHRoZSByYW5nZQpkaWZmKHJhbmdlKHVzZWRjYXJzJHByaWNlKSkKYGBgCgpgYGB7cn0KI0lRUiBmb3IgdXNlZCBjYXIgcHJpY2VzCklRUih1c2VkY2FycyRwcmljZSkKYGBgCgpgYGB7cn0KI1VzZSBxdWFudGlsZSB0byBjYWxjdWxhdGUgZml2ZS1udW1iZXIgc3VtbWFyeQpxdWFudGlsZSh1c2VkY2FycyRwcmljZSkKYGBgCgpgYGB7cn0KI1VzZSBxdWFudGlsZSB0byBjYWxjdWxhdGUgNSBudW1iZXIgc3VtbWFyeQpxdWFudGlsZSh1c2VkY2FycyRwcmljZSkKYGBgCgpgYGB7cn0KI1RoZSA5OXRoIHBlcmNlbnRpbGUKcXVhbnRpbGUodXNlZGNhcnMkcHJpY2UsIHByb2JzID0gYygwLjAxLDAuOTkpKQpgYGAKCmBgYHtyfQojUXVpbnRpbGVzCnF1YW50aWxlKHVzZWRjYXJzJHByaWNlLCBzZXEoZnJvbT0wLCB0bz0xLCBieT0wLjIwKSkKYGBgCgpgYGB7cn0KI0JveHBsb3Qgb2YgdXNlZCBjYXIgcHJpY2VzIGFuZCBtaWxlYWdlCmJveHBsb3QodXNlZGNhcnMkcHJpY2UsIG1haW49IkJveHBsb3Qgb2YgVXNlZCBDYXIgUHJpY2VzIiwKICAgICAgICB5bGFiPSJQcmljZSAoJCkiKQpgYGAKCgpgYGB7cn0KYm94cGxvdCh1c2VkY2FycyRtaWxlYWdlLCBtYWluPSJCb3hwbG90IG9mIFVzZWQgQ2FyIE1pbGVhZ2UiLAogICAgICAgIHlsYWI9Ik9kb21ldGVyIChtaS4pIikKYGBgCgpgYGB7cn0KI0hpc3RvZ3JhbXMgb2YgdXNlZCBjYXIgcHJpY2VzIGFuZCBtaWxlYWdlCmhpc3QodXNlZGNhcnMkcHJpY2UsIG1haW49Ikhpc3RvZ3JhbSBvZiBVc2VkIENhciBQcmljZXMiLAogICAgIHhsYWI9IlByaWNlICgkKSIpCmBgYAoKYGBge3J9Cmhpc3QodXNlZGNhcnMkbWlsZWFnZSwgbWFpbiA9ICJIaXN0b2dyYW0gb2YgVXNlZCBDYXIgTWlsZWFnZSIsCiAgICAgeGxhYiA9ICJPZG9tZXRlciAobWkuKSIpCmBgYAoKYGBge3J9CiNWYXJpYW5jZSBhbmQgc3RhbmRhcmQgZGV2aWF0aW9uIG9mIHRoZSB1c2VkIGNhciBkYXRhCnZhcih1c2VkY2FycyRwcmljZSkKYGBgCgpgYGB7cn0Kc2QodXNlZGNhcnMkcHJpY2UpCmBgYAoKYGBge3J9CnZhcih1c2VkY2FycyRtaWxlYWdlKQpgYGAKCmBgYHtyfQpzZCh1c2VkY2FycyRtaWxlYWdlKQpgYGAKCgojI0V4cGxvcmluZyBOdW1lcmljIFZhcmlhYmxlcyAtLS0KCgpgYGB7cn0KI09uZS13YXkgdGFibGVzIGZvciB0aGUgdXNlZCBjYXIgZGF0YQp0YWJsZSh1c2VkY2FycyR5ZWFyKQpgYGAKCmBgYHtyfQp0YWJsZSh1c2VkY2FycyRtb2RlbCkKYGBgCgpgYGB7cn0KdGFibGUodXNlZGNhcnMkY29sb3IpCmBgYAoKYGBge3J9CiNDb21wdXRlIHRhYmxlIHByb3BvcnRpb25zCm1vZGVsX3RhYmxlIDwtIHRhYmxlKHVzZWRjYXJzJG1vZGVsKQpwcm9wLnRhYmxlKG1vZGVsX3RhYmxlKQpgYGAKCmBgYHtyfQojUm91bmQgdGhlIGRhdGEKY29sb3JfdGFibGUgPC0gdGFibGUodXNlZGNhcnMkY29sb3IpCmNvbG9yX3BjdCA8LSBwcm9wLnRhYmxlKGNvbG9yX3RhYmxlKSAqIDEwMApyb3VuZChjb2xvcl9wY3QsZGlnaXRzID0gMSkKYGBgCgoKIyNFeHBsb3JpbmcgcmVsYXRpb25zaGlwcyBiZXR3ZWVuIHZhcmlhYmxlcyAtLS0KCgpgYGB7cn0KI1NjYXR0ZXJwbG90IG9mIHByaWNlIHZzLiBtaWxlYWdlCnBsb3QoeCA9IHVzZWRjYXJzJG1pbGVhZ2UsIHkgPSB1c2VkY2FycyRwcmljZSwKICAgICBtYWluID0gIlNjYXR0ZXJwbG90IG9mIFByaWNlIHZzLiBNaWxlYWdlIiwKICAgICB4bGFiID0gIlVzZWQgQ2FyIE9kb21ldGVyIChtaS4pIiwKICAgICB5bGFiID0gIlVzZWQgQ2FyIFByaWNlICgkKSIpCmBgYAoKYGBge3J9CiNOZXcgVmFyaWFibGUgSW5kaWNhdGluZyBDb25zZXJ2YXRpdmUgQ29sb3JzCnVzZWRjYXJzJGNvbnNlcnZhdGl2ZSA8LQogIHVzZWRjYXJzJGNvbG9yICVpbiUgYygiQmxhY2siLCJHcmF5IiwiU2lsdmVyIiwiV2hpdGUiKQpgYGAKCmBgYHtyfQojQ2hlY2tpbmcgb3VyIFZhcmlhYmxlCnRhYmxlKHVzZWRjYXJzJGNvbnNlcnZhdGl2ZSkKYGBgCgpgYGB7cn0KaW5zdGFsbC5wYWNrYWdlcygiZ21vZGVscyIpCmBgYAoKYGBge3J9CiNDcm9zc3RhYiBvZiBjb25zZXJ2YXRpdmUgYnkgbW9kZWwKbGlicmFyeShnbW9kZWxzKQpgYGAKCmBgYHtyfQpDcm9zc1RhYmxlKHggPSB1c2VkY2FycyRtb2RlbCwgeSA9IHVzZWRjYXJzJGNvbnNlcnZhdGl2ZSkKYGBgCg==