The data is examined using the command “str” (gives the structure of the data), “head” (lists the first 6 rows of observation in the dataset), and “describe” from the “psych” package (gives quite detailed summary statistics on the continuous variables).
# {r mpg, warning = FALSE}
# install.packages("tidyverse")
writeLines('PATH="${RTOOLS40_HOME}\\usr\\bin;${PATH}"', con = "~/.Renviron")
Sys.which("make")
## make
## "C:\\rtools40\\usr\\bin\\make.exe"
## "C:\\rtools40\\usr\\bin\\make.exe"
install.packages("psych")
## Installing package into 'C:/Users/Valued Customer/Documents/R/win-library/4.0'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository YOUR FAVORITE MIRROR/src/contrib:
## scheme not supported in URL 'YOUR FAVORITE MIRROR/src/contrib/PACKAGES'
## Warning: package 'psych' is not available (for R version 4.0.1)
## Warning: unable to access index for repository YOUR FAVORITE MIRROR/bin/windows/contrib/4.0:
## scheme not supported in URL 'YOUR FAVORITE MIRROR/bin/windows/contrib/4.0/PACKAGES'
local({r <- getOption("repos")
r["CRAN"] <- "http://cran.r-project.org"
options(repos=r)})
file.edit(file.path("~", ".Rprofile")) # edit .Rprofile in HOME
install.packages("installr")
## Installing package into 'C:/Users/Valued Customer/Documents/R/win-library/4.0'
## (as 'lib' is unspecified)
## package 'installr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Valued Customer\AppData\Local\Temp\RtmpQxfn3u\downloaded_packages
library(installr)
##
## Welcome to installr version 0.22.0
##
## More information is available on the installr project website:
## https://github.com/talgalili/installr/
##
## Contact: <tal.galili@gmail.com>
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/installr/issues
##
## To suppress this message use:
## suppressPackageStartupMessages(library(installr))
install.packages("psych")
## Installing package into 'C:/Users/Valued Customer/Documents/R/win-library/4.0'
## (as 'lib' is unspecified)
## package 'psych' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Valued Customer\AppData\Local\Temp\RtmpQxfn3u\downloaded_packages
library(tidyverse)
library(psych) # used for the "describe" command below
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
str(mpg)
## tibble [234 x 11] (S3: tbl_df/tbl/data.frame)
## $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
## $ model : chr [1:234] "a4" "a4" "a4" "a4" ...
## $ displ : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr [1:234] "f" "f" "f" "f" ...
## $ cty : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr [1:234] "p" "p" "p" "p" ...
## $ class : chr [1:234] "compact" "compact" "compact" "compact" ...
head(mpg)
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa~
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa~
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa~
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa~
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa~
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa~
describe(mpg)
## Warning in describe(mpg): NAs introduced by coercion
## Warning in describe(mpg): NAs introduced by coercion
## Warning in describe(mpg): NAs introduced by coercion
## Warning in describe(mpg): NAs introduced by coercion
## Warning in describe(mpg): NAs introduced by coercion
## Warning in describe(mpg): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## vars n mean sd median trimmed mad min max range skew
## manufacturer* 1 234 NaN NA NA NaN NA Inf -Inf -Inf NA
## model* 2 234 NaN NA NA NaN NA Inf -Inf -Inf NA
## displ 3 234 3.47 1.29 3.3 3.39 1.33 1.6 7 5.4 0.44
## year 4 234 2003.50 4.51 2003.5 2003.50 6.67 1999.0 2008 9.0 0.00
## cyl 5 234 5.89 1.61 6.0 5.86 2.97 4.0 8 4.0 0.11
## trans* 6 234 NaN NA NA NaN NA Inf -Inf -Inf NA
## drv* 7 234 4.00 0.00 4.0 4.00 0.00 4.0 4 0.0 NaN
## cty 8 234 16.86 4.26 17.0 16.61 4.45 9.0 35 26.0 0.79
## hwy 9 234 23.44 5.95 24.0 23.23 7.41 12.0 44 32.0 0.36
## fl* 10 234 NaN NA NA NaN NA Inf -Inf -Inf NA
## class* 11 234 NaN NA NA NaN NA Inf -Inf -Inf NA
## kurtosis se
## manufacturer* NA NA
## model* NA NA
## displ -0.91 0.08
## year -2.01 0.29
## cyl -1.46 0.11
## trans* NA NA
## drv* NaN 0.00
## cty 1.43 0.28
## hwy 0.14 0.39
## fl* NA NA
## class* NA NA