# Clear the console
cat("\014")
# Check if the package is installed. If not, install the package
if(!require('fueleconomy')) {
install.packages('fueleconomy')
library(fueleconomy)
}
if(!require('psych')) {
install.packages('psych')
library(psych)
}
if(!require('ggplot2')) {
install.packages('ggplot2')
library(ggplot2)
}
vehiclesDF <- as.data.frame(fueleconomy::vehicles)
head(vehiclesDF)
You should phrase your research question in a way that matches up with the scope of inference your dataset allows for.
The following research questions will be analyzed
What are the cases, and how many are there?
Each case represents a vehicle type and its specification. There 33442 observations in the given data set.
Describe the method of data collection.
Data is retrieved from [Hadley Wickham’s R Studio Blog] (https://blog.rstudio.com/2014/07/23/new-data-packages/) which is one of the approved data sources to get the data from. This data is available in the R package “fueleconomy”.
What type of study is this (observational/experiment)?
This is an observational study.
If you collected the data, state self-collected. If not, provide a citation/link.
Data is collected by Hadley Wickham and is available online here: https://blog.rstudio.com/2014/07/23/new-data-packages/ For this project, data was extracted using the fueleconomy
R package.
What is the response variable, and what type is it (numerical/categorical)?
The response variable is degree of fuel efficiency impact among different vehicle types and is numerical.
What is the explanatory variable, and what type is it (numerical/categorival)?
The explanatory variable is fuel efficiency and is numerical.
Provide summary statistics relevant to your research question. For example, if you’re comparing means across groups provide means, SDs, sample sizes of each group. This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.
describe(vehiclesDF$hwy)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 33442 23.55 6.21 23 23.18 5.93 9 109 100 2.15 19.61
## se
## X1 0.03
describe(vehiclesDF$cty)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 33442 17.49 5.58 17 17 2.97 6 138 132 6.18 96
## se
## X1 0.03
table(vehiclesDF$class, useNA='ifany')
##
## Compact Cars Large Cars
## 4739 1533
## Midsize-Large Station Wagons Midsize Cars
## 627 3621
## Midsize Station Wagons Minicompact Cars
## 415 1080
## Minivan - 2WD Minivan - 4WD
## 308 44
## Small Pickup Trucks Small Pickup Trucks 2WD
## 538 392
## Small Pickup Trucks 4WD Small Sport Utility Vehicle 2WD
## 181 169
## Small Sport Utility Vehicle 4WD Small Station Wagons
## 213 1295
## Special Purpose Vehicle Special Purpose Vehicle 2WD
## 1 553
## Special Purpose Vehicle 4WD Special Purpose Vehicles
## 289 1453
## Special Purpose Vehicles/2wd Special Purpose Vehicles/4wd
## 2 2
## Sport Utility Vehicle - 2WD Sport Utility Vehicle - 4WD
## 1626 2091
## Standard Pickup Trucks Standard Pickup Trucks 2WD
## 2354 1106
## Standard Pickup Trucks 4WD Standard Pickup Trucks/2wd
## 910 4
## Standard Sport Utility Vehicle 2WD Standard Sport Utility Vehicle 4WD
## 76 171
## Subcompact Cars Two Seaters
## 4185 1602
## Vans Vans Passenger
## 1141 2
## Vans, Cargo Type Vans, Passenger Type
## 434 285
describeBy(vehiclesDF$hwy, group = vehiclesDF$class, mat=TRUE)
## item group1 vars n mean sd
## X11 1 Compact Cars 1 4739 27.80629 5.3742223
## X12 2 Large Cars 1 1533 23.79713 5.8587545
## X13 3 Midsize-Large Station Wagons 1 627 24.06539 2.8007071
## X14 4 Midsize Cars 1 3621 25.87793 5.7437943
## X15 5 Midsize Station Wagons 1 415 25.23373 3.2445896
## X16 6 Minicompact Cars 1 1080 25.53611 6.6785446
## X17 7 Minivan - 2WD 1 308 23.26948 2.1160417
## X18 8 Minivan - 4WD 1 44 21.40909 0.6220066
## X19 9 Small Pickup Trucks 1 538 23.25836 2.9207686
## X110 10 Small Pickup Trucks 2WD 1 392 23.33163 3.3784664
## X111 11 Small Pickup Trucks 4WD 1 181 20.49724 2.0128043
## X112 12 Small Sport Utility Vehicle 2WD 1 169 28.63905 7.0613731
## X113 13 Small Sport Utility Vehicle 4WD 1 213 25.70892 3.2577578
## X114 14 Small Station Wagons 1 1295 28.03320 5.0950450
## X115 15 Special Purpose Vehicle 1 1 24.00000 NA
## X116 16 Special Purpose Vehicle 2WD 1 553 20.35443 4.7747319
## X117 17 Special Purpose Vehicle 4WD 1 289 19.01038 3.6633360
## X118 18 Special Purpose Vehicles 1 1453 19.19270 3.6629749
## X119 19 Special Purpose Vehicles/2wd 1 2 21.50000 2.1213203
## X120 20 Special Purpose Vehicles/4wd 1 2 19.00000 0.0000000
## X121 21 Sport Utility Vehicle - 2WD 1 1626 22.43050 4.6189341
## X122 22 Sport Utility Vehicle - 4WD 1 2091 20.34099 3.2685157
## X123 23 Standard Pickup Trucks 1 2354 17.67077 2.9237257
## X124 24 Standard Pickup Trucks 2WD 1 1106 19.49819 3.5966923
## X125 25 Standard Pickup Trucks 4WD 1 910 17.80879 2.3033743
## X126 26 Standard Pickup Trucks/2wd 1 4 15.50000 1.7320508
## X127 27 Standard Sport Utility Vehicle 2WD 1 76 21.92105 2.9383132
## X128 28 Standard Sport Utility Vehicle 4WD 1 171 21.29240 3.3862873
## X129 29 Subcompact Cars 1 4185 27.04946 6.1803821
## X130 30 Two Seaters 1 1602 24.11548 7.7441778
## X131 31 Vans 1 1141 17.52585 3.0771494
## X132 32 Vans Passenger 1 2 17.00000 4.2426407
## X133 33 Vans, Cargo Type 1 434 16.98157 1.9425065
## X134 34 Vans, Passenger Type 1 285 16.44211 1.8176509
## median trimmed mad min max range skew kurtosis
## X11 28.0 27.54759 4.4478 9 99 90 1.864748658 19.70937325
## X12 24.0 23.63325 2.9652 9 97 88 6.487652731 76.60116407
## X13 24.0 23.94632 2.9652 16 42 26 1.042486462 4.45450840
## X14 25.0 25.56127 2.9652 9 102 93 3.125495438 34.58746436
## X15 25.0 25.09910 1.4826 16 45 29 1.252870742 6.45447188
## X16 24.5 24.94213 3.7065 13 108 95 5.472344616 60.55085653
## X17 23.0 23.21774 1.4826 17 33 16 0.400123259 3.39041458
## X18 21.0 21.30556 0.0000 21 23 2 1.187151768 0.26336372
## X19 23.0 23.01620 1.4826 16 35 19 0.958653423 1.91140748
## X110 23.0 22.95860 2.9652 18 58 40 3.274720396 27.43845398
## X111 20.0 20.43448 1.4826 15 28 13 0.536534001 1.00757965
## X112 28.0 27.68613 2.9652 20 74 54 4.694520135 25.56120449
## X113 25.0 25.68421 2.9652 18 34 16 0.149096235 -0.49055044
## X114 28.0 27.74156 4.4478 18 105 87 5.619948715 79.16111739
## X115 24.0 24.00000 0.0000 24 24 0 NA NA
## X116 21.0 20.39052 2.9652 10 62 52 2.083910344 19.35313512
## X117 19.0 19.16309 4.4478 10 27 17 -0.273043771 -0.61598240
## X118 20.0 19.21152 4.4478 10 32 22 -0.003126172 -0.38650407
## X119 21.5 21.50000 2.2239 20 23 3 0.000000000 -2.75000000
## X120 19.0 19.00000 0.0000 19 19 0 NaN NaN
## X121 22.0 22.06298 4.4478 14 74 60 3.634903445 32.40787061
## X122 20.0 20.21040 2.9652 13 30 17 0.335301371 -0.53209471
## X123 17.0 17.62951 2.9652 10 29 19 0.184168338 -0.15364200
## X124 19.0 19.39278 2.9652 11 54 43 3.280254686 29.54041933
## X125 18.0 17.82555 1.4826 10 27 17 -0.078159454 0.89907800
## X126 15.5 15.50000 2.2239 14 17 3 0.000000000 -2.43750000
## X127 22.0 21.82258 2.9652 16 30 14 0.253837131 -0.15761576
## X128 21.0 21.06569 2.9652 14 31 17 0.413515982 -0.03288392
## X129 26.0 26.70976 4.4478 9 109 100 2.490866017 25.20129102
## X130 23.0 23.39704 4.4478 10 93 83 2.967720115 19.66164007
## X131 17.0 17.49507 2.9652 10 26 16 0.195127720 -0.24248962
## X132 17.0 17.00000 4.4478 14 20 6 0.000000000 -2.75000000
## X133 17.0 16.95690 1.4826 10 25 15 0.076814340 1.77799441
## X134 16.0 16.45852 1.4826 10 22 12 -0.152558046 2.46865747
## se
## X11 0.07806786
## X12 0.14963536
## X13 0.11184947
## X14 0.09545191
## X15 0.15927064
## X16 0.20322164
## X17 0.12057266
## X18 0.09377102
## X19 0.12592327
## X110 0.17063832
## X111 0.14961057
## X112 0.54318254
## X113 0.22321789
## X114 0.14158366
## X115 NA
## X116 0.20304225
## X117 0.21549035
## X118 0.09609507
## X119 1.50000000
## X120 0.00000000
## X121 0.11454641
## X122 0.07147819
## X123 0.06026059
## X124 0.10814980
## X125 0.07635611
## X126 0.86602540
## X127 0.33704763
## X128 0.25895586
## X129 0.09553612
## X130 0.19348356
## X131 0.09109735
## X132 3.00000000
## X133 0.09324330
## X134 0.10766835
ggplot(vehiclesDF, aes(x=vehiclesDF$hwy)) + geom_histogram()
ggplot(vehiclesDF, aes(x=vehiclesDF$cty)) + geom_histogram()