1. Data type of each variable in mtcars

mpg, disp, hp, drat, wt, qsec are all numeric. cyl, vs, am, gear, carb are all integers The first column with the names of the cars is a character data type.

names(mtcars)
 [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
[11] "carb"
class(mtcars$mpg)
[1] "numeric"
class(mtcars$cyl)
[1] "numeric"
class(mtcars$disp)
[1] "numeric"
class(mtcars$hp)
[1] "numeric"
class(mtcars$drat)
[1] "numeric"

2. Classification of each variable in mtcars

mpg, disp, hp, drat, wt, qsec are all continuous variables. cyl, vs, am, gear, and carb are all discrete variables

3. Summary of three variables in mtcars

#Summary of mpg
mpg_summary <- summary(mtcars$mpg)
mpg_summary
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  10.40   15.43   19.20   20.09   22.80   33.90 
#Summary of disp
displacement_summary <- summary(mtcars$disp)
displacement_summary
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   71.1   120.8   196.3   230.7   326.0   472.0 
#Summary of wt
weight_summary <- summary(mtcars$wt)
weight_summary
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.513   2.581   3.325   3.217   3.610   5.424 

4. Identify the data type of the three variables agegp, alcgp, and tobgp in the dataset esoph.

agegp, alcgp, and tobgp are all continuous. ncases and ncontrols are both discrete

class(esoph$agegp)
[1] "ordered" "factor" 
class(esoph$alcgp)
[1] "ordered" "factor" 
class(esoph$tobgp)
[1] "ordered" "factor" 
class(esoph$ncases)
[1] "numeric"
class(esoph$ncontrols)
[1] "numeric"

5. Frequency and relative frequency distributions of agegp, alcgp, and tobgp in esoph.

#Frequency and relative distribution of the variable agegp
frequency_agegp <- (table(esoph$agegp))
freq_dist_agegp <- frequency_agegp / nrow(esoph)
rnd_agegp<-round(freq_dist_agegp, digits = 2)
cbind(frequency_agegp,rnd_agegp)
      frequency_agegp rnd_agegp
25-34              15      0.17
35-44              15      0.17
45-54              16      0.18
55-64              16      0.18
65-74              15      0.17
75+                11      0.12
#Frequency and relative distribution of the variable alcgp
frequency_alcgp <- (table(esoph$alcgp))
freq_dist_alcgp <- frequency_alcgp / nrow((esoph))
rnd_alcgp<-round(freq_dist_alcgp, digits = 2)
cbind(frequency_alcgp, rnd_alcgp)
          frequency_alcgp rnd_alcgp
0-39g/day              23      0.26
40-79                  23      0.26
80-119                 21      0.24
120+                   21      0.24
#Frequency and relative distribution of the variable tobgp
frequency_tobgp <- (table(esoph$tobgp))
freq_dist_tobgp <- frequency_tobgp / nrow(esoph)
rnd_tobgp<-round(freq_dist_tobgp, digits = 2)
cbind(frequency_tobgp,rnd_tobgp)
         frequency_tobgp rnd_tobgp
0-9g/day              24      0.27
10-19                 24      0.27
20-29                 20      0.23
30+                   20      0.23

6. Joint frequency of agegp and alcgp as well as alcgp and tobgp in esoph.

#Joint frequency of agegp and alcgp
table(esoph$agegp, esoph$alcgp)
       
        0-39g/day 40-79 80-119 120+
  25-34         4     4      3    4
  35-44         4     4      4    3
  45-54         4     4      4    4
  55-64         4     4      4    4
  65-74         4     3      4    4
  75+           3     4      2    2
#joint frequency of alcgp and tobgp
table(esoph$alcgp, esoph$tobgp)
           
            0-9g/day 10-19 20-29 30+
  0-39g/day        6     6     5   6
  40-79            6     6     6   5
  80-119           6     6     4   5
  120+             6     6     5   4

7. Diamonds dataset

a. display the range of the variables price, carat, depth, and table.

#Range of diamond price
library(ggplot2)
range(diamonds$price)
[1]   326 18823
#Range of diamond carat
range(diamonds$carat)
[1] 0.20 5.01
#Range of diamond depth
range(diamonds$depth)
[1] 43 79
#Range of diamond table
range(diamonds$table)
[1] 43 95

b. Report the grouped frequency of any two of the variables in 7.a.

#Grouped frequency of diamond price and diamond carat
library(dplyr)
tbl_df(cbind(diamonds$price, diamonds$carat))
# A tibble: 53,940 x 2
      V1    V2
   <dbl> <dbl>
 1   326  0.23
 2   326  0.21
 3   327  0.23
 4   334  0.29
 5   335  0.31
 6   336  0.24
 7   336  0.24
 8   337  0.26
 9   337  0.22
10   338  0.23
# ... with 53,930 more rows

c. Plot a histogram for all four variables identified in 7.a.