#Question 1: Using R code, identify the data type of each variable in mtcars, a dataset built into R as long as you have the package, datasets, installed.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(datasets)
glimpse(mtcars)
## Observations: 32
## Variables: 11
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19....
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, ...
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 1...
## $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, ...
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.9...
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3...
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 2...
## $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, ...
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, ...
mtcars
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
#character data: Names of cars
#numeric data: mpg (Miles/(US) gallon), disp (Displacement (cu.in.)), hp (Gross horsepower), drat (Rear axle ratio), wt (Weight (1000 lbs)), qsec (1/4 mile time)
#integer data: cyl (Number of cylinders), vs (v or straight engine shape), gear (Number of forward gears), am (Transmission (0 = automatic, 1 = manual)), carb (Number of carburetors)
#source for dataset information: https://www.rdocumentation.org/packages/datasets/versions/3.4.1/topics/mtcars
Character Data: Names of cars
Numeric Data: mpg (Miles/(US) gallon), disp (Displacement (cu.in.)), hp (Gross horsepower), drat (Rear axle ratio), wt (Weight (1000 lbs)), qsec (1/4 mile time)
Integer Data: cyl (Number of cylinders), vs (v or straight engine shape), gear (Number of forward gears), am (Transmission (0 = automatic, 1 = manual)), carb (Number of carburetors)
CONTINUOUS: mpg, disp, drat, wt, qsec
DISCRETE: cyl, hp, vs, am, gear, carb
#1 variable: mpg
mpg_summary <- summary(mtcars$mpg)
mpg_summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.40 15.42 19.20 20.09 22.80 33.90
#2 variable: disp
disp_summary <-summary(mtcars$disp)
disp_summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 71.1 120.8 196.3 230.7 326.0 472.0
#3 variable: vs
vs_summary <-summary(mtcars$vs)
vs_summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.4375 1.0000 1.0000
data(esoph)
summary(esoph)
## agegp alcgp tobgp ncases ncontrols
## 25-34:15 0-39g/day:23 0-9g/day:24 Min. : 0.000 Min. : 1.00
## 35-44:15 40-79 :23 10-19 :24 1st Qu.: 0.000 1st Qu.: 3.00
## 45-54:16 80-119 :21 20-29 :20 Median : 1.000 Median : 6.00
## 55-64:16 120+ :21 30+ :20 Mean : 2.273 Mean :11.08
## 65-74:15 3rd Qu.: 4.000 3rd Qu.:14.00
## 75+ :11 Max. :17.000 Max. :60.00
esoph
## agegp alcgp tobgp ncases ncontrols
## 1 25-34 0-39g/day 0-9g/day 0 40
## 2 25-34 0-39g/day 10-19 0 10
## 3 25-34 0-39g/day 20-29 0 6
## 4 25-34 0-39g/day 30+ 0 5
## 5 25-34 40-79 0-9g/day 0 27
## 6 25-34 40-79 10-19 0 7
## 7 25-34 40-79 20-29 0 4
## 8 25-34 40-79 30+ 0 7
## 9 25-34 80-119 0-9g/day 0 2
## 10 25-34 80-119 10-19 0 1
## 11 25-34 80-119 30+ 0 2
## 12 25-34 120+ 0-9g/day 0 1
## 13 25-34 120+ 10-19 1 1
## 14 25-34 120+ 20-29 0 1
## 15 25-34 120+ 30+ 0 2
## 16 35-44 0-39g/day 0-9g/day 0 60
## 17 35-44 0-39g/day 10-19 1 14
## 18 35-44 0-39g/day 20-29 0 7
## 19 35-44 0-39g/day 30+ 0 8
## 20 35-44 40-79 0-9g/day 0 35
## 21 35-44 40-79 10-19 3 23
## 22 35-44 40-79 20-29 1 14
## 23 35-44 40-79 30+ 0 8
## 24 35-44 80-119 0-9g/day 0 11
## 25 35-44 80-119 10-19 0 6
## 26 35-44 80-119 20-29 0 2
## 27 35-44 80-119 30+ 0 1
## 28 35-44 120+ 0-9g/day 2 3
## 29 35-44 120+ 10-19 0 3
## 30 35-44 120+ 20-29 2 4
## 31 45-54 0-39g/day 0-9g/day 1 46
## 32 45-54 0-39g/day 10-19 0 18
## 33 45-54 0-39g/day 20-29 0 10
## 34 45-54 0-39g/day 30+ 0 4
## 35 45-54 40-79 0-9g/day 6 38
## 36 45-54 40-79 10-19 4 21
## 37 45-54 40-79 20-29 5 15
## 38 45-54 40-79 30+ 5 7
## 39 45-54 80-119 0-9g/day 3 16
## 40 45-54 80-119 10-19 6 14
## 41 45-54 80-119 20-29 1 5
## 42 45-54 80-119 30+ 2 4
## 43 45-54 120+ 0-9g/day 4 4
## 44 45-54 120+ 10-19 3 4
## 45 45-54 120+ 20-29 2 3
## 46 45-54 120+ 30+ 4 4
## 47 55-64 0-39g/day 0-9g/day 2 49
## 48 55-64 0-39g/day 10-19 3 22
## 49 55-64 0-39g/day 20-29 3 12
## 50 55-64 0-39g/day 30+ 4 6
## 51 55-64 40-79 0-9g/day 9 40
## 52 55-64 40-79 10-19 6 21
## 53 55-64 40-79 20-29 4 17
## 54 55-64 40-79 30+ 3 6
## 55 55-64 80-119 0-9g/day 9 18
## 56 55-64 80-119 10-19 8 15
## 57 55-64 80-119 20-29 3 6
## 58 55-64 80-119 30+ 4 4
## 59 55-64 120+ 0-9g/day 5 10
## 60 55-64 120+ 10-19 6 7
## 61 55-64 120+ 20-29 2 3
## 62 55-64 120+ 30+ 5 6
## 63 65-74 0-39g/day 0-9g/day 5 48
## 64 65-74 0-39g/day 10-19 4 14
## 65 65-74 0-39g/day 20-29 2 7
## 66 65-74 0-39g/day 30+ 0 2
## 67 65-74 40-79 0-9g/day 17 34
## 68 65-74 40-79 10-19 3 10
## 69 65-74 40-79 20-29 5 9
## 70 65-74 80-119 0-9g/day 6 13
## 71 65-74 80-119 10-19 4 12
## 72 65-74 80-119 20-29 2 3
## 73 65-74 80-119 30+ 1 1
## 74 65-74 120+ 0-9g/day 3 4
## 75 65-74 120+ 10-19 1 2
## 76 65-74 120+ 20-29 1 1
## 77 65-74 120+ 30+ 1 1
## 78 75+ 0-39g/day 0-9g/day 1 18
## 79 75+ 0-39g/day 10-19 2 6
## 80 75+ 0-39g/day 30+ 1 3
## 81 75+ 40-79 0-9g/day 2 5
## 82 75+ 40-79 10-19 1 3
## 83 75+ 40-79 20-29 0 3
## 84 75+ 40-79 30+ 1 1
## 85 75+ 80-119 0-9g/day 1 1
## 86 75+ 80-119 10-19 1 1
## 87 75+ 120+ 0-9g/day 2 2
## 88 75+ 120+ 10-19 1 1
#esoph
#agegp= age grouping - ordered, factor
#alcgp= alcohol consumption - ordered, factor
#tobgp= tobacco consumption - ordered, factor
# Summary of age
agegp_summary <- summary(esoph$agegp)
agegp_summary
## 25-34 35-44 45-54 55-64 65-74 75+
## 15 15 16 16 15 11
#Summary of alcohol consumption
alcgp_summary <- summary(esoph$alcgp)
alcgp_summary
## 0-39g/day 40-79 80-119 120+
## 23 23 21 21
#Summary of tobacco consumption
tobgp_summary <- summary(esoph$tobgp)
tobgp_summary
## 0-9g/day 10-19 20-29 30+
## 24 24 20 20
agegp= age grouping - ordered, factor
alcgp= alcohol consumption - ordered, factor
tobgp= tobacco consumption - ordered, factor
# FREQUENCIES
freqagegp <-table(esoph$agegp)
freqagegp
##
## 25-34 35-44 45-54 55-64 65-74 75+
## 15 15 16 16 15 11
freqalcgp <-table(esoph$alcgp)
freqalcgp
##
## 0-39g/day 40-79 80-119 120+
## 23 23 21 21
freqtobgp <-table(esoph$tobgp)
freqtobgp
##
## 0-9g/day 10-19 20-29 30+
## 24 24 20 20
#RELATIVE FREQUENCIES
relfreqagegp <-freqagegp/nrow(esoph)
rnd_relfreqagegp <-round(relfreqagegp,digits = 2)
rnd_relfreqagegp
##
## 25-34 35-44 45-54 55-64 65-74 75+
## 0.17 0.17 0.18 0.18 0.17 0.12
relfreqalcgp <-freqalcgp/nrow(esoph)
rnd_relfreqalcgp <-round(relfreqalcgp,digits = 2)
rnd_relfreqalcgp
##
## 0-39g/day 40-79 80-119 120+
## 0.26 0.26 0.24 0.24
relfreqtobcgp <-freqtobgp/nrow(esoph)
rnd_relfreqtobcgp <-round(relfreqtobcgp,digits = 2)
rnd_relfreqtobcgp
##
## 0-9g/day 10-19 20-29 30+
## 0.27 0.27 0.23 0.23
tablejf1 <-table(esoph$agegp, esoph$alcgp)
tablejf1
##
## 0-39g/day 40-79 80-119 120+
## 25-34 4 4 3 4
## 35-44 4 4 4 3
## 45-54 4 4 4 4
## 55-64 4 4 4 4
## 65-74 4 3 4 4
## 75+ 3 4 2 2
tablejf2 <-table(esoph$alcgp, esoph$tobgp)
tablejf2
##
## 0-9g/day 10-19 20-29 30+
## 0-39g/day 6 6 5 6
## 40-79 6 6 6 5
## 80-119 6 6 4 5
## 120+ 6 6 5 4
xtabjf1 <- xtabs(~esoph$agegp + esoph$alcg)
xtabjf1
## esoph$alcg
## esoph$agegp 0-39g/day 40-79 80-119 120+
## 25-34 4 4 3 4
## 35-44 4 4 4 3
## 45-54 4 4 4 4
## 55-64 4 4 4 4
## 65-74 4 3 4 4
## 75+ 3 4 2 2
xtabjf2 <- xtabs(~esoph$agegp + esoph$alcg)
xtabjf2
## esoph$alcg
## esoph$agegp 0-39g/day 40-79 80-119 120+
## 25-34 4 4 3 4
## 35-44 4 4 4 3
## 45-54 4 4 4 4
## 55-64 4 4 4 4
## 65-74 4 3 4 4
## 75+ 3 4 2 2
diamonds
## # A tibble: 53,940 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4.00 4.05 2.39
## # ... with 53,930 more rows
summary(diamonds)
## carat cut color clarity
## Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065
## 1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258
## Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194
## Mean :0.7979 Premium :13791 G:11292 VS1 : 8171
## 3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066
## Max. :5.0100 I: 5422 VVS1 : 3655
## J: 2808 (Other): 2531
## depth table price x
## Min. :43.00 Min. :43.00 Min. : 326 Min. : 0.000
## 1st Qu.:61.00 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710
## Median :61.80 Median :57.00 Median : 2401 Median : 5.700
## Mean :61.75 Mean :57.46 Mean : 3933 Mean : 5.731
## 3rd Qu.:62.50 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540
## Max. :79.00 Max. :95.00 Max. :18823 Max. :10.740
##
## y z
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.720 1st Qu.: 2.910
## Median : 5.710 Median : 3.530
## Mean : 5.735 Mean : 3.539
## 3rd Qu.: 6.540 3rd Qu.: 4.040
## Max. :58.900 Max. :31.800
##
price <-diamonds$price
range(price)
## [1] 326 18823
carat <-diamonds$carat
range(carat)
## [1] 0.20 5.01
depth <-diamonds$depth
range(depth)
## [1] 43 79
table <-diamonds$table
range(table)
## [1] 43 95
grpfrq <- tbl_df(cbind(diamonds$price, diamonds$depth))
grpfrq
## # A tibble: 53,940 x 2
## V1 V2
## <dbl> <dbl>
## 1 326 61.5
## 2 326 59.8
## 3 327 56.9
## 4 334 62.4
## 5 335 63.3
## 6 336 62.8
## 7 336 62.3
## 8 337 61.9
## 9 337 65.1
## 10 338 59.4
## # ... with 53,930 more rows
pricehisto <- hist(diamonds$price)
pricehisto
## $breaks
## [1] 0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000
## [12] 11000 12000 13000 14000 15000 16000 17000 18000 19000
##
## $counts
## [1] 14524 9683 6129 4225 4665 3163 2278 1668 1307 1076 934
## [12] 825 701 603 504 513 425 405 312
##
## $density
## [1] 2.692621e-04 1.795143e-04 1.136263e-04 7.832777e-05 8.648498e-05
## [6] 5.863923e-05 4.223211e-05 3.092325e-05 2.423063e-05 1.994809e-05
## [11] 1.731554e-05 1.529477e-05 1.299592e-05 1.117909e-05 9.343715e-06
## [16] 9.510567e-06 7.879125e-06 7.508343e-06 5.784205e-06
##
## $mids
## [1] 500 1500 2500 3500 4500 5500 6500 7500 8500 9500 10500
## [12] 11500 12500 13500 14500 15500 16500 17500 18500
##
## $xname
## [1] "diamonds$price"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
carathisto <- hist(diamonds$carat)
carathisto
## $breaks
## [1] 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5
##
## $counts
## [1] 18932 17506 12060 3553 1763 94 23 4 4 0 1
##
## $density
## [1] 7.019651e-01 6.490916e-01 4.471635e-01 1.317390e-01 6.536893e-02
## [6] 3.485354e-03 8.527994e-04 1.483129e-04 1.483129e-04 0.000000e+00
## [11] 3.707824e-05
##
## $mids
## [1] 0.25 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75 5.25
##
## $xname
## [1] "diamonds$carat"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
depthhisto <- hist(diamonds$depth)
depthhisto
## $breaks
## [1] 42 44 46 48 50 52 54 56 58 60 62 64 66 68 70 72 74 76 78 80
##
## $counts
## [1] 3 0 0 0 2 11 53 603 4953 25885 20560
## [12] 1504 278 65 17 3 0 0 3
##
## $density
## [1] 2.780868e-05 0.000000e+00 0.000000e+00 0.000000e+00 1.853912e-05
## [6] 1.019651e-04 4.912866e-04 5.589544e-03 4.591212e-02 2.399425e-01
## [11] 1.905821e-01 1.394142e-02 2.576937e-03 6.025213e-04 1.575825e-04
## [16] 2.780868e-05 0.000000e+00 0.000000e+00 2.780868e-05
##
## $mids
## [1] 43 45 47 49 51 53 55 57 59 61 63 65 67 69 71 73 75 77 79
##
## $xname
## [1] "diamonds$depth"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
tablehisto <- hist(diamonds$table)
tablehisto
## $breaks
## [1] 40 45 50 55 60 65 70 75 80 85 90 95
##
## $counts
## [1] 2 4 9837 39251 4665 173 5 2 0 0 1
##
## $density
## [1] 7.415647e-06 1.483129e-05 3.647386e-02 1.455358e-01 1.729700e-02
## [6] 6.414535e-04 1.853912e-05 7.415647e-06 0.000000e+00 0.000000e+00
## [11] 3.707824e-06
##
## $mids
## [1] 42.5 47.5 52.5 57.5 62.5 67.5 72.5 77.5 82.5 87.5 92.5
##
## $xname
## [1] "diamonds$table"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"