library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

1.Identify the data types of each variable in “mtcars”

str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

2.Report your classification of each variable in “mtcars”

cyl,disp,hp,vs,am,gear,carb are all discrete variables
mpg, drat,wt,qsec are all continous variables

3.Report the distribution of three variables in “mtcars”

Summary of qsec
qsec_summary<-summary(mtcars$qsec)
qsec_summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.50   16.89   17.71   17.85   18.90   22.90
Summary of drat
drat_summary<-summary(mtcars$drat)
drat_summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.760   3.080   3.695   3.597   3.920   4.930
Summary of weight
weight_summary<-summary(mtcars$wt)
weight_summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.513   2.581   3.325   3.217   3.610   5.424

4. Identify the data type of agegp,alcgp, tobgp in esoph

class(esoph$agegp)
## [1] "ordered" "factor"
class(esoph$alcgp)
## [1] "ordered" "factor"
class(esoph$tobgp)
## [1] "ordered" "factor"

5. Report the frequency and relitive frequency distribution of agegp, alcgp, tobgp

freqagegp<-table(esoph$agegp)
freqagegp
## 
## 25-34 35-44 45-54 55-64 65-74   75+ 
##    15    15    16    16    15    11
relfreqagegp<-freqagegp/nrow(esoph)
rnd_relfreqagegp<-round(relfreqagegp,digits = 2)
rnd_relfreqagegp
## 
## 25-34 35-44 45-54 55-64 65-74   75+ 
##  0.17  0.17  0.18  0.18  0.17  0.12
cbind(freqagegp,rnd_relfreqagegp)
##       freqagegp rnd_relfreqagegp
## 25-34        15             0.17
## 35-44        15             0.17
## 45-54        16             0.18
## 55-64        16             0.18
## 65-74        15             0.17
## 75+          11             0.12
freqalcgp<-table(esoph$alcgp)
freqalcgp
## 
## 0-39g/day     40-79    80-119      120+ 
##        23        23        21        21
relfreqalcgp<-freqalcgp/nrow(esoph)
rnd_relfreqalcgp<-round(relfreqalcgp,digits = 2)
rnd_relfreqalcgp
## 
## 0-39g/day     40-79    80-119      120+ 
##      0.26      0.26      0.24      0.24
cbind(freqalcgp,rnd_relfreqalcgp)
##           freqalcgp rnd_relfreqalcgp
## 0-39g/day        23             0.26
## 40-79            23             0.26
## 80-119           21             0.24
## 120+             21             0.24
freqtobgp<-table(esoph$tobgp)
freqtobgp
## 
## 0-9g/day    10-19    20-29      30+ 
##       24       24       20       20
relfreqtobgp<-freqtobgp/nrow(esoph)
rnd_relfreqtobgp<-round(relfreqtobgp,digits = 2)
rnd_relfreqtobgp
## 
## 0-9g/day    10-19    20-29      30+ 
##     0.27     0.27     0.23     0.23
cbind(freqtobgp,rnd_relfreqtobgp)
##          freqtobgp rnd_relfreqtobgp
## 0-9g/day        24             0.27
## 10-19           24             0.27
## 20-29           20             0.23
## 30+             20             0.23

6. Joint Table agegp,aclgp and aclgp,tobgp

table(esoph$agegp,esoph$alcgp)
##        
##         0-39g/day 40-79 80-119 120+
##   25-34         4     4      3    4
##   35-44         4     4      4    3
##   45-54         4     4      4    4
##   55-64         4     4      4    4
##   65-74         4     3      4    4
##   75+           3     4      2    2
table(esoph$alcgp,esoph$tobgp)
##            
##             0-9g/day 10-19 20-29 30+
##   0-39g/day        6     6     5   6
##   40-79            6     6     6   5
##   80-119           6     6     4   5
##   120+             6     6     5   4

7a. Display range of prices,carat,depth,table

library(ggplot2)

range(diamonds$price)
## [1]   326 18823
range(diamonds$carat)
## [1] 0.20 5.01
range(diamonds$depth)
## [1] 43 79
range(diamonds$table)
## [1] 43 95

7b. Report the grouped frequency of price, depth

Price

library(ggplot2)
price <-diamonds$price
range(price)
## [1]   326 18823
price_break <- seq (0, 20000, by=4000)
price_break
## [1]     0  4000  8000 12000 16000 20000
price.cut <- cut(price, price_break, right=FALSE)
price.freq <-table(price.cut)
price.freq
## price.cut
##         [0,4e+03)     [4e+03,8e+03)   [8e+03,1.2e+04) [1.2e+04,1.6e+04) 
##             34560             11774              4142              2322 
##   [1.6e+04,2e+04) 
##              1142
cbind(price.freq)
##                   price.freq
## [0,4e+03)              34560
## [4e+03,8e+03)          11774
## [8e+03,1.2e+04)         4142
## [1.2e+04,1.6e+04)       2322
## [1.6e+04,2e+04)         1142

Depth

library(ggplot2)
depth <-diamonds$depth
range(depth)
## [1] 43 79
depth_break <- seq (0,80, by=20)
depth_break
## [1]  0 20 40 60 80
depth.cut <- cut(depth, depth_break, right=FALSE)
depth.freq <-table(depth.cut)
depth.freq
## depth.cut
##  [0,20) [20,40) [40,60) [60,80) 
##       0       0    5114   48826
cbind(depth.freq)
##         depth.freq
## [0,20)           0
## [20,40)          0
## [40,60)       5114
## [60,80)      48826

7c. Print hisogram for price,carat,depth,table

hist(diamonds$price)

hist(diamonds$carat)

hist(diamonds$depth)

hist(diamonds$table)