This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

plot(cars)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

#setting up WD

getwd() #finding working directory
## [1] "/Users/jakescicluna/Downloads"
usedcars <- read.csv("usedcars.csv", stringsAsFactors = FALSE) #importing dataset
str(usedcars)
## 'data.frame':    150 obs. of  6 variables:
##  $ year        : int  2011 2011 2011 2011 2012 2010 2011 2010 2011 2010 ...
##  $ model       : chr  "SEL" "SEL" "SEL" "SEL" ...
##  $ price       : int  21992 20995 19995 17809 17500 17495 17000 16995 16995 16995 ...
##  $ mileage     : int  7413 10926 7351 11613 8367 25125 27393 21026 32655 36116 ...
##  $ color       : chr  "Yellow" "Gray" "Silver" "Gray" ...
##  $ transmission: chr  "AUTO" "AUTO" "AUTO" "AUTO" ...

#Exploring numeric values

summary(usedcars$year) #shows basic stats of years
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2000    2008    2009    2009    2010    2012
summary(usedcars[c("price", "mileage")]) #shows summary of multiple variables
##      price          mileage      
##  Min.   : 3800   Min.   :  4867  
##  1st Qu.:10995   1st Qu.: 27200  
##  Median :13592   Median : 36385  
##  Mean   :12962   Mean   : 44261  
##  3rd Qu.:14904   3rd Qu.: 55124  
##  Max.   :21992   Max.   :151479
(36000+44000+56000)/3 #calculating mean income with numeric operations
## [1] 45333.33
mean(c(36000,44000,56000)) #easier way to caluclate mean
## [1] 45333.33
median(c(36000,44000,56000)) #calculating median income
## [1] 44000
range(usedcars$price) #shows min and max of car prices
## [1]  3800 21992
diff(range(usedcars$price)) #difference in the range
## [1] 18192
IQR(usedcars$price) #interquartile range
## [1] 3909.5
quantile(usedcars$price) #shwos five number summary
##      0%     25%     50%     75%    100% 
##  3800.0 10995.0 13591.5 14904.5 21992.0
quantile(usedcars$price, probs = c(0.01, 0.99)) #shows 1% and 99% percentile
##       1%      99% 
##  5428.69 20505.00
quantile(usedcars$price, seq(from = 0, to = 1, by = 0.20)) #shows percentiles by 20%
##      0%     20%     40%     60%     80%    100% 
##  3800.0 10759.4 12993.8 13992.0 14999.0 21992.0
boxplot(usedcars$price, main="Boxplot of Used Car Prices",#Making a boxplot of car prices
        ylab = "Price ($)")

boxplot(usedcars$mileage, main="Boxploit of Used Car Mileage",
        ylab = "Odometer (mi.)")

hist(usedcars$price, main= "Histogram of Used Car Prices", #histogram
     xlab = "Price ($)")

hist(usedcars$mileage, main = "Historgam of Used Car Milegae",
     xlab = ("Odometer(mi.)"))

var(usedcars$price) #variance of prices
## [1] 9749892
sd(usedcars$price) #standard deviation of price
## [1] 3122.482
var(usedcars$mileage)
## [1] 728033954
sd(usedcars$mileage)
## [1] 26982.1

#Exploring numeric variables

table(usedcars$year) #one way table
## 
## 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 
##    3    1    1    1    3    2    6   11   14   42   49   16    1
table(usedcars$model)
## 
##  SE SEL SES 
##  78  23  49
table(usedcars$color)
## 
##  Black   Blue   Gold   Gray  Green    Red Silver  White Yellow 
##     35     17      1     16      5     25     32     16      3
model_table <- table(usedcars$model)
prop.table(model_table) #making a table with proportions
## 
##        SE       SEL       SES 
## 0.5200000 0.1533333 0.3266667
color_table <- table(usedcars$color)
color_pct <- prop.table(color_table) *100
round(color_pct, digits = 1) #making a rounded porpotion tbale 
## 
##  Black   Blue   Gold   Gray  Green    Red Silver  White Yellow 
##   23.3   11.3    0.7   10.7    3.3   16.7   21.3   10.7    2.0

#Exploring relationships Between Variables

plot(x = usedcars$mileage, y = usedcars$price, #scatterplot of mileage and price
     main = "Scatterplot of Price vs Mileage",
     xlab = "Used Car Odometer (mi.)",
     ylab = "Used Car Price ($)")

usedcars$conservative <- usedcars$color %in% c("Black", "Gray", "Silver", "White") #new variable that only included conservative colors of cars
table(usedcars$conservative) #shows how many cars are considered conservative colors
## 
## FALSE  TRUE 
##    51    99
library(gmodels)
## Warning: package 'gmodels' was built under R version 4.3.3
CrossTable(x = usedcars$model, y = usedcars$conservative) #making a cross table 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  150 
## 
##  
##                | usedcars$conservative 
## usedcars$model |     FALSE |      TRUE | Row Total | 
## ---------------|-----------|-----------|-----------|
##             SE |        27 |        51 |        78 | 
##                |     0.009 |     0.004 |           | 
##                |     0.346 |     0.654 |     0.520 | 
##                |     0.529 |     0.515 |           | 
##                |     0.180 |     0.340 |           | 
## ---------------|-----------|-----------|-----------|
##            SEL |         7 |        16 |        23 | 
##                |     0.086 |     0.044 |           | 
##                |     0.304 |     0.696 |     0.153 | 
##                |     0.137 |     0.162 |           | 
##                |     0.047 |     0.107 |           | 
## ---------------|-----------|-----------|-----------|
##            SES |        17 |        32 |        49 | 
##                |     0.007 |     0.004 |           | 
##                |     0.347 |     0.653 |     0.327 | 
##                |     0.333 |     0.323 |           | 
##                |     0.113 |     0.213 |           | 
## ---------------|-----------|-----------|-----------|
##   Column Total |        51 |        99 |       150 | 
##                |     0.340 |     0.660 |           | 
## ---------------|-----------|-----------|-----------|
## 
##