Question 1
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(mtcars)
## Observations: 32
## Variables: 11
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19....
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, ...
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 1...
## $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, ...
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.9...
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3...
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 2...
## $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, ...
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, ...
#Variables in mtcars (data type).
#Character data type = names of cars.
#Integers data type = Number of carburetors, Number of forward gears, Number of cylinders, Transmission (0 = automatic, 1 = manual), Engine type.
#Numeric = mpg = Miles/(US) gallon, qsec = 1/4 mile time, wt Weight (1000 lbs), hp = Gross horsepower, drat = Rear axle ratio, disp = Displacement (cu.in.).
Question 2
#Classification of each varible in mtcars.
# Discrete Variables= Number of cylinders, Number of gears, Number of carburetors, Transmission, Engine type
# Continuous variables = qsec = 1/4 mile time, wt = Weight (1000 lbs), hp = Gross horsepower, mpg = Miles/(US) gallon, disp = Displacement (cu.in.), drat = Rear axle ratio.
Question 3
#Summary of cylinders
cyl_summary <- summary(mtcars$cyl)
cyl_summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.000 4.000 6.000 6.188 8.000 8.000
# Summary of horsepower
hp_summary <- summary(mtcars$hp)
hp_summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 52.0 96.5 123.0 146.7 180.0 335.0
# Summary of carburetors
carb_summary <- summary(mtcars$carb)
carb_summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 2.000 2.812 4.000 8.000
Question 4
library(datasets)
data(esoph)
summary(esoph)
## agegp alcgp tobgp ncases ncontrols
## 25-34:15 0-39g/day:23 0-9g/day:24 Min. : 0.000 Min. : 1.00
## 35-44:15 40-79 :23 10-19 :24 1st Qu.: 0.000 1st Qu.: 3.00
## 45-54:16 80-119 :21 20-29 :20 Median : 1.000 Median : 6.00
## 55-64:16 120+ :21 30+ :20 Mean : 2.273 Mean :11.08
## 65-74:15 3rd Qu.: 4.000 3rd Qu.:14.00
## 75+ :11 Max. :17.000 Max. :60.00
#esoph
#agegp= age grouping
#alcgp= alcohol consumption
#tobgp= tobacco consumption
# Summary of age
agegp_summary <- summary(esoph$agegp)
agegp_summary
## 25-34 35-44 45-54 55-64 65-74 75+
## 15 15 16 16 15 11
# Summary of Alcohol consumption
alcqp_summary <- summary(esoph$alcgp)
alcqp_summary
## 0-39g/day 40-79 80-119 120+
## 23 23 21 21
# Summary of Tobacco consumption
tobgp_summary <- summary(esoph$tobgp)
tobgp_summary
## 0-9g/day 10-19 20-29 30+
## 24 24 20 20
Question 5
#Frequency and relative distribution of the variable agegp
freqagegp<-table(esoph$agegp)
freqagegp
##
## 25-34 35-44 45-54 55-64 65-74 75+
## 15 15 16 16 15 11
relfreqagegp<-freqagegp/nrow(esoph)
rnd_relfreqagegp<-round(relfreqagegp,digits = 2)
rnd_relfreqagegp
##
## 25-34 35-44 45-54 55-64 65-74 75+
## 0.17 0.17 0.18 0.18 0.17 0.12
cbind(freqagegp,rnd_relfreqagegp)
## freqagegp rnd_relfreqagegp
## 25-34 15 0.17
## 35-44 15 0.17
## 45-54 16 0.18
## 55-64 16 0.18
## 65-74 15 0.17
## 75+ 11 0.12
#Frequency and relative distribution of the variable alcgp
freqalcgp<-table(esoph$alcgp)
freqalcgp
##
## 0-39g/day 40-79 80-119 120+
## 23 23 21 21
relfreqalcgp<-freqalcgp/nrow(esoph)
rnd_relfreqalcgp<-round(relfreqalcgp,digits = 2)
rnd_relfreqalcgp
##
## 0-39g/day 40-79 80-119 120+
## 0.26 0.26 0.24 0.24
cbind(freqalcgp,rnd_relfreqalcgp)
## freqalcgp rnd_relfreqalcgp
## 0-39g/day 23 0.26
## 40-79 23 0.26
## 80-119 21 0.24
## 120+ 21 0.24
#Frequency and relative distribution of the variable tobgp
freqtobgp<-table(esoph$tobgp)
freqtobgp
##
## 0-9g/day 10-19 20-29 30+
## 24 24 20 20
relfreqtobgp<-freqtobgp/nrow(esoph)
rnd_relfreqtobgp<-round(relfreqtobgp,digits = 2)
rnd_relfreqtobgp
##
## 0-9g/day 10-19 20-29 30+
## 0.27 0.27 0.23 0.23
cbind(freqtobgp,rnd_relfreqtobgp)
## freqtobgp rnd_relfreqtobgp
## 0-9g/day 24 0.27
## 10-19 24 0.27
## 20-29 20 0.23
## 30+ 20 0.23
Question 6
x <- table(esoph$agegp, esoph$alcgp)
ftable(x)
## 0-39g/day 40-79 80-119 120+
##
## 25-34 4 4 3 4
## 35-44 4 4 4 3
## 45-54 4 4 4 4
## 55-64 4 4 4 4
## 65-74 4 3 4 4
## 75+ 3 4 2 2
y <- table(esoph$alcgp, esoph$tobgp)
ftable(y)
## 0-9g/day 10-19 20-29 30+
##
## 0-39g/day 6 6 5 6
## 40-79 6 6 6 5
## 80-119 6 6 4 5
## 120+ 6 6 5 4
Question 7(A)
library(ggplot2)
diamonds
## # A tibble: 53,940 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4.00 4.05 2.39
## # ... with 53,930 more rows
# diamonds
range(diamonds$price)
## [1] 326 18823
range(diamonds$carat)
## [1] 0.20 5.01
range(diamonds$depth)
## [1] 43 79
range(diamonds$table)
## [1] 43 95
Question 7(B)
library(dplyr)
tbl_df(cbind(diamonds$price, diamonds$carat))
## # A tibble: 53,940 x 2
## V1 V2
## <dbl> <dbl>
## 1 326 0.23
## 2 326 0.21
## 3 327 0.23
## 4 334 0.29
## 5 335 0.31
## 6 336 0.24
## 7 336 0.24
## 8 337 0.26
## 9 337 0.22
## 10 338 0.23
## # ... with 53,930 more rows
Question 7(C)
hist(diamonds$price)
hist(diamonds$carat)
hist(diamonds$depth)
hist(diamonds$table)