Question 1

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
glimpse(mtcars)
## Observations: 32
## Variables: 11
## $ mpg  <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19....
## $ cyl  <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, ...
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 1...
## $ hp   <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, ...
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.9...
## $ wt   <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3...
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 2...
## $ vs   <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ am   <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, ...
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, ...
#Variables in mtcars (data type).

#Character data type = names of cars.

#Integers data type = Number of carburetors, Number of forward gears, Number of cylinders, Transmission (0 = automatic, 1 = manual), Engine type.

#Numeric = mpg = Miles/(US) gallon, qsec = 1/4 mile time, wt    Weight (1000 lbs),  hp =    Gross horsepower, drat = Rear axle ratio, disp = Displacement (cu.in.).

Question 2

#Classification of each varible in mtcars.

# Discrete Variables= Number of cylinders, Number of gears, Number of carburetors, Transmission, Engine type

# Continuous variables = qsec = 1/4 mile time, wt = Weight (1000 lbs), hp = Gross horsepower, mpg = Miles/(US) gallon, disp =   Displacement (cu.in.), drat =   Rear axle ratio.

Question 3

#Summary of cylinders
cyl_summary <- summary(mtcars$cyl)
cyl_summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.000   4.000   6.000   6.188   8.000   8.000
# Summary of horsepower
hp_summary <- summary(mtcars$hp)
hp_summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    52.0    96.5   123.0   146.7   180.0   335.0
# Summary of carburetors
carb_summary <- summary(mtcars$carb)
carb_summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   2.000   2.812   4.000   8.000

Question 4

library(datasets)
data(esoph)
summary(esoph)
##    agegp          alcgp         tobgp        ncases         ncontrols    
##  25-34:15   0-39g/day:23   0-9g/day:24   Min.   : 0.000   Min.   : 1.00  
##  35-44:15   40-79    :23   10-19   :24   1st Qu.: 0.000   1st Qu.: 3.00  
##  45-54:16   80-119   :21   20-29   :20   Median : 1.000   Median : 6.00  
##  55-64:16   120+     :21   30+     :20   Mean   : 2.273   Mean   :11.08  
##  65-74:15                                3rd Qu.: 4.000   3rd Qu.:14.00  
##  75+  :11                                Max.   :17.000   Max.   :60.00
#esoph
#agegp= age grouping
#alcgp= alcohol consumption
#tobgp= tobacco consumption

# Summary of age
agegp_summary <- summary(esoph$agegp)
agegp_summary
## 25-34 35-44 45-54 55-64 65-74   75+ 
##    15    15    16    16    15    11
# Summary of Alcohol consumption
alcqp_summary <- summary(esoph$alcgp)
alcqp_summary
## 0-39g/day     40-79    80-119      120+ 
##        23        23        21        21
# Summary of Tobacco consumption
tobgp_summary <- summary(esoph$tobgp)
tobgp_summary
## 0-9g/day    10-19    20-29      30+ 
##       24       24       20       20

Question 5

#Frequency and relative distribution of the variable agegp
freqagegp<-table(esoph$agegp)
freqagegp
## 
## 25-34 35-44 45-54 55-64 65-74   75+ 
##    15    15    16    16    15    11
relfreqagegp<-freqagegp/nrow(esoph)
rnd_relfreqagegp<-round(relfreqagegp,digits = 2)
rnd_relfreqagegp
## 
## 25-34 35-44 45-54 55-64 65-74   75+ 
##  0.17  0.17  0.18  0.18  0.17  0.12
cbind(freqagegp,rnd_relfreqagegp)
##       freqagegp rnd_relfreqagegp
## 25-34        15             0.17
## 35-44        15             0.17
## 45-54        16             0.18
## 55-64        16             0.18
## 65-74        15             0.17
## 75+          11             0.12
#Frequency and relative distribution of the variable alcgp
freqalcgp<-table(esoph$alcgp)
freqalcgp
## 
## 0-39g/day     40-79    80-119      120+ 
##        23        23        21        21
relfreqalcgp<-freqalcgp/nrow(esoph)
rnd_relfreqalcgp<-round(relfreqalcgp,digits = 2)
rnd_relfreqalcgp
## 
## 0-39g/day     40-79    80-119      120+ 
##      0.26      0.26      0.24      0.24
cbind(freqalcgp,rnd_relfreqalcgp)
##           freqalcgp rnd_relfreqalcgp
## 0-39g/day        23             0.26
## 40-79            23             0.26
## 80-119           21             0.24
## 120+             21             0.24
#Frequency and relative distribution of the variable tobgp
freqtobgp<-table(esoph$tobgp)
freqtobgp
## 
## 0-9g/day    10-19    20-29      30+ 
##       24       24       20       20
relfreqtobgp<-freqtobgp/nrow(esoph)
rnd_relfreqtobgp<-round(relfreqtobgp,digits = 2)
rnd_relfreqtobgp
## 
## 0-9g/day    10-19    20-29      30+ 
##     0.27     0.27     0.23     0.23
cbind(freqtobgp,rnd_relfreqtobgp)
##          freqtobgp rnd_relfreqtobgp
## 0-9g/day        24             0.27
## 10-19           24             0.27
## 20-29           20             0.23
## 30+             20             0.23

Question 6

x <- table(esoph$agegp, esoph$alcgp)
ftable(x)
##        0-39g/day 40-79 80-119 120+
##                                   
## 25-34          4     4      3    4
## 35-44          4     4      4    3
## 45-54          4     4      4    4
## 55-64          4     4      4    4
## 65-74          4     3      4    4
## 75+            3     4      2    2
y <- table(esoph$alcgp, esoph$tobgp)
ftable(y)
##            0-9g/day 10-19 20-29 30+
##                                    
## 0-39g/day         6     6     5   6
## 40-79             6     6     6   5
## 80-119            6     6     4   5
## 120+              6     6     5   4

Question 7(A)

library(ggplot2)
diamonds
## # A tibble: 53,940 x 10
##    carat       cut color clarity depth table price     x     y     z
##    <dbl>     <ord> <ord>   <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
##  1  0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
##  2  0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
##  3  0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
##  4  0.29   Premium     I     VS2  62.4    58   334  4.20  4.23  2.63
##  5  0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
##  6  0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48
##  7  0.24 Very Good     I    VVS1  62.3    57   336  3.95  3.98  2.47
##  8  0.26 Very Good     H     SI1  61.9    55   337  4.07  4.11  2.53
##  9  0.22      Fair     E     VS2  65.1    61   337  3.87  3.78  2.49
## 10  0.23 Very Good     H     VS1  59.4    61   338  4.00  4.05  2.39
## # ... with 53,930 more rows
# diamonds
range(diamonds$price)
## [1]   326 18823
range(diamonds$carat)
## [1] 0.20 5.01
range(diamonds$depth)
## [1] 43 79
range(diamonds$table)
## [1] 43 95

Question 7(B)

library(dplyr)
tbl_df(cbind(diamonds$price, diamonds$carat))
## # A tibble: 53,940 x 2
##       V1    V2
##    <dbl> <dbl>
##  1   326  0.23
##  2   326  0.21
##  3   327  0.23
##  4   334  0.29
##  5   335  0.31
##  6   336  0.24
##  7   336  0.24
##  8   337  0.26
##  9   337  0.22
## 10   338  0.23
## # ... with 53,930 more rows

Question 7(C)

hist(diamonds$price)

hist(diamonds$carat)

hist(diamonds$depth)

hist(diamonds$table)