library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
cyl,disp,hp,vs,am,gear,carb are all discrete variables
mpg, drat,wt,qsec are all continous variables
qsec_summary<-summary(mtcars$qsec)
qsec_summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.50 16.89 17.71 17.85 18.90 22.90
drat_summary<-summary(mtcars$drat)
drat_summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.760 3.080 3.695 3.597 3.920 4.930
weight_summary<-summary(mtcars$wt)
weight_summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.513 2.581 3.325 3.217 3.610 5.424
class(esoph$agegp)
## [1] "ordered" "factor"
class(esoph$alcgp)
## [1] "ordered" "factor"
class(esoph$tobgp)
## [1] "ordered" "factor"
freqagegp<-table(esoph$agegp)
freqagegp
##
## 25-34 35-44 45-54 55-64 65-74 75+
## 15 15 16 16 15 11
relfreqagegp<-freqagegp/nrow(esoph)
rnd_relfreqagegp<-round(relfreqagegp,digits = 2)
rnd_relfreqagegp
##
## 25-34 35-44 45-54 55-64 65-74 75+
## 0.17 0.17 0.18 0.18 0.17 0.12
cbind(freqagegp,rnd_relfreqagegp)
## freqagegp rnd_relfreqagegp
## 25-34 15 0.17
## 35-44 15 0.17
## 45-54 16 0.18
## 55-64 16 0.18
## 65-74 15 0.17
## 75+ 11 0.12
freqalcgp<-table(esoph$alcgp)
freqalcgp
##
## 0-39g/day 40-79 80-119 120+
## 23 23 21 21
relfreqalcgp<-freqalcgp/nrow(esoph)
rnd_relfreqalcgp<-round(relfreqalcgp,digits = 2)
rnd_relfreqalcgp
##
## 0-39g/day 40-79 80-119 120+
## 0.26 0.26 0.24 0.24
cbind(freqalcgp,rnd_relfreqalcgp)
## freqalcgp rnd_relfreqalcgp
## 0-39g/day 23 0.26
## 40-79 23 0.26
## 80-119 21 0.24
## 120+ 21 0.24
freqtobgp<-table(esoph$tobgp)
freqtobgp
##
## 0-9g/day 10-19 20-29 30+
## 24 24 20 20
relfreqtobgp<-freqtobgp/nrow(esoph)
rnd_relfreqtobgp<-round(relfreqtobgp,digits = 2)
rnd_relfreqtobgp
##
## 0-9g/day 10-19 20-29 30+
## 0.27 0.27 0.23 0.23
cbind(freqtobgp,rnd_relfreqtobgp)
## freqtobgp rnd_relfreqtobgp
## 0-9g/day 24 0.27
## 10-19 24 0.27
## 20-29 20 0.23
## 30+ 20 0.23
table(esoph$agegp,esoph$alcgp)
##
## 0-39g/day 40-79 80-119 120+
## 25-34 4 4 3 4
## 35-44 4 4 4 3
## 45-54 4 4 4 4
## 55-64 4 4 4 4
## 65-74 4 3 4 4
## 75+ 3 4 2 2
table(esoph$alcgp,esoph$tobgp)
##
## 0-9g/day 10-19 20-29 30+
## 0-39g/day 6 6 5 6
## 40-79 6 6 6 5
## 80-119 6 6 4 5
## 120+ 6 6 5 4
library(ggplot2)
range(diamonds$price)
## [1] 326 18823
range(diamonds$carat)
## [1] 0.20 5.01
range(diamonds$depth)
## [1] 43 79
range(diamonds$table)
## [1] 43 95
library(ggplot2)
price <-diamonds$price
range(price)
## [1] 326 18823
price_break <- seq (0, 20000, by=4000)
price_break
## [1] 0 4000 8000 12000 16000 20000
price.cut <- cut(price, price_break, right=FALSE)
price.freq <-table(price.cut)
price.freq
## price.cut
## [0,4e+03) [4e+03,8e+03) [8e+03,1.2e+04) [1.2e+04,1.6e+04)
## 34560 11774 4142 2322
## [1.6e+04,2e+04)
## 1142
cbind(price.freq)
## price.freq
## [0,4e+03) 34560
## [4e+03,8e+03) 11774
## [8e+03,1.2e+04) 4142
## [1.2e+04,1.6e+04) 2322
## [1.6e+04,2e+04) 1142
library(ggplot2)
depth <-diamonds$depth
range(depth)
## [1] 43 79
depth_break <- seq (0,80, by=20)
depth_break
## [1] 0 20 40 60 80
depth.cut <- cut(depth, depth_break, right=FALSE)
depth.freq <-table(depth.cut)
depth.freq
## depth.cut
## [0,20) [20,40) [40,60) [60,80)
## 0 0 5114 48826
cbind(depth.freq)
## depth.freq
## [0,20) 0
## [20,40) 0
## [40,60) 5114
## [60,80) 48826
hist(diamonds$price)
hist(diamonds$carat)
hist(diamonds$depth)
hist(diamonds$table)