This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
getwd()
## [1] "/cloud/project"
usedcars <- read.csv("usedcars.csv", stringsAsFactors = FALSE)
str(usedcars)
## 'data.frame': 150 obs. of 6 variables:
## $ year : int 2011 2011 2011 2011 2012 2010 2011 2010 2011 2010 ...
## $ model : chr "SEL" "SEL" "SEL" "SEL" ...
## $ price : int 21992 20995 19995 17809 17500 17495 17000 16995 16995 16995 ...
## $ mileage : int 7413 10926 7351 11613 8367 25125 27393 21026 32655 36116 ...
## $ color : chr "Yellow" "Gray" "Silver" "Gray" ...
## $ transmission: chr "AUTO" "AUTO" "AUTO" "AUTO" ...
summary(usedcars$year)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2000 2008 2009 2009 2010 2012
summary(usedcars$color)
## Length Class Mode
## 150 character character
#View(usedcars) - commented out so I can publish
usedcars
summary(usedcars[c("price", "mileage")])
## price mileage
## Min. : 3800 Min. : 4867
## 1st Qu.:10995 1st Qu.: 27200
## Median :13592 Median : 36385
## Mean :12962 Mean : 44261
## 3rd Qu.:14904 3rd Qu.: 55124
## Max. :21992 Max. :151479
(36000 + 44000 + 56000) / 3
## [1] 45333.33
mean(c(36000, 44000, 56000))
## [1] 45333.33
range(usedcars$price)
## [1] 3800 21992
range(usedcars$year)
## [1] 2000 2012
diff(range(usedcars$year))
## [1] 12
IQR(usedcars$mileage)
## [1] 27924.25
quantile(usedcars$year)
## 0% 25% 50% 75% 100%
## 2000 2008 2009 2010 2012
quantile(usedcars$year, probs = c(0.01, 0.99))
## 1% 99%
## 2000 2011
quantile(usedcars$price, seq(from = 0, to = 1, by = 0.20))
## 0% 20% 40% 60% 80% 100%
## 3800.0 10759.4 12993.8 13992.0 14999.0 21992.0
boxplot(usedcars$year, main="Boxplot of Used Car Years",
ylab="Year (YYYY)")
boxplot(usedcars$mileage, main="Boxplot of Used Car Mileage",
ylab="Odometer (mi.)")
hist(usedcars$price, main = "Histogram of Used Car Prices",
xlab = "Price ($)")
var(usedcars$year)
## [1] 4.844251
sd(usedcars$year)
## [1] 2.200966
table(usedcars$transmission)
##
## AUTO MANUAL
## 128 22
model_table <- table(usedcars$model)
prop.table(model_table)
##
## SE SEL SES
## 0.5200000 0.1533333 0.3266667
color_table <- table(usedcars$color)
color_pct <- prop.table(color_table) * 100
round(color_pct, digits = 1)
##
## Black Blue Gold Gray Green Red Silver White Yellow
## 23.3 11.3 0.7 10.7 3.3 16.7 21.3 10.7 2.0
plot(x = usedcars$mileage, y = usedcars$price,
main = "Scatterplot of Price vs. Color",
xlab = "Used Car Color (color)",
ylab = "Used Car Price ($)")
install.packages("gmodels")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
usedcars$conservative <-
usedcars$color %in% c("Black", "Gray", "Silver", "White")
table(usedcars$conservative)
##
## FALSE TRUE
## 51 99
library(gmodels)
CrossTable(x = usedcars$model, y = usedcars$conservative)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 150
##
##
## | usedcars$conservative
## usedcars$model | FALSE | TRUE | Row Total |
## ---------------|-----------|-----------|-----------|
## SE | 27 | 51 | 78 |
## | 0.009 | 0.004 | |
## | 0.346 | 0.654 | 0.520 |
## | 0.529 | 0.515 | |
## | 0.180 | 0.340 | |
## ---------------|-----------|-----------|-----------|
## SEL | 7 | 16 | 23 |
## | 0.086 | 0.044 | |
## | 0.304 | 0.696 | 0.153 |
## | 0.137 | 0.162 | |
## | 0.047 | 0.107 | |
## ---------------|-----------|-----------|-----------|
## SES | 17 | 32 | 49 |
## | 0.007 | 0.004 | |
## | 0.347 | 0.653 | 0.327 |
## | 0.333 | 0.323 | |
## | 0.113 | 0.213 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 51 | 99 | 150 |
## | 0.340 | 0.660 | |
## ---------------|-----------|-----------|-----------|
##
##