library(datasets)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
mtcars <- tbl_df(mtcars)
mtcars
## # A tibble: 32 x 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## # ... with 22 more rows
library(tibble)
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
mtcars$hp
## [1] 110 110 93 110 175 105 245 62 95 123 123 180 180 180 205 215 230
## [18] 66 52 65 97 150 150 245 175 66 91 113 264 175 335 109
mtcars$wt
## [1] 2.620 2.875 2.320 3.215 3.440 3.460 3.570 3.190 3.150 3.440 3.440
## [12] 4.070 3.730 3.780 5.250 5.424 5.345 2.200 1.615 1.835 2.465 3.520
## [23] 3.435 3.840 3.845 1.935 2.140 1.513 3.170 2.770 3.570 2.780
mtcars$carb
## [1] 4 4 1 1 2 1 4 2 2 4 4 3 3 3 4 4 4 1 2 1 1 2 2 4 2 1 2 2 4 6 8 2
esoph <- tbl_df(esoph)
esoph
## # A tibble: 88 x 5
## agegp alcgp tobgp ncases ncontrols
## <ord> <ord> <ord> <dbl> <dbl>
## 1 25-34 0-39g/day 0-9g/day 0 40
## 2 25-34 0-39g/day 10-19 0 10
## 3 25-34 0-39g/day 20-29 0 6
## 4 25-34 0-39g/day 30+ 0 5
## 5 25-34 40-79 0-9g/day 0 27
## 6 25-34 40-79 10-19 0 7
## 7 25-34 40-79 20-29 0 4
## 8 25-34 40-79 30+ 0 7
## 9 25-34 80-119 0-9g/day 0 2
## 10 25-34 80-119 10-19 0 1
## # ... with 78 more rows
esoph$agegp
## [1] 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34
## [12] 25-34 25-34 25-34 25-34 35-44 35-44 35-44 35-44 35-44 35-44 35-44
## [23] 35-44 35-44 35-44 35-44 35-44 35-44 35-44 35-44 45-54 45-54 45-54
## [34] 45-54 45-54 45-54 45-54 45-54 45-54 45-54 45-54 45-54 45-54 45-54
## [45] 45-54 45-54 55-64 55-64 55-64 55-64 55-64 55-64 55-64 55-64 55-64
## [56] 55-64 55-64 55-64 55-64 55-64 55-64 55-64 65-74 65-74 65-74 65-74
## [67] 65-74 65-74 65-74 65-74 65-74 65-74 65-74 65-74 65-74 65-74 65-74
## [78] 75+ 75+ 75+ 75+ 75+ 75+ 75+ 75+ 75+ 75+ 75+
## Levels: 25-34 < 35-44 < 45-54 < 55-64 < 65-74 < 75+
agegp.freq <- table(esoph$agegp)
agegp.freq
##
## 25-34 35-44 45-54 55-64 65-74 75+
## 15 15 16 16 15 11
cbind(agegp.freq)
## agegp.freq
## 25-34 15
## 35-44 15
## 45-54 16
## 55-64 16
## 65-74 15
## 75+ 11
nrow(agegp.freq)
## [1] 6
agegp.relfreq <- agegp.freq/nrow(agegp.freq)
agegp.relfreq
##
## 25-34 35-44 45-54 55-64 65-74 75+
## 2.500000 2.500000 2.666667 2.666667 2.500000 1.833333
round.agegp.relfreq <- round(agegp.relfreq, digits = 2)
round.agegp.relfreq
##
## 25-34 35-44 45-54 55-64 65-74 75+
## 2.50 2.50 2.67 2.67 2.50 1.83
esoph$alcgp
## [1] 0-39g/day 0-39g/day 0-39g/day 0-39g/day 40-79 40-79 40-79
## [8] 40-79 80-119 80-119 80-119 120+ 120+ 120+
## [15] 120+ 0-39g/day 0-39g/day 0-39g/day 0-39g/day 40-79 40-79
## [22] 40-79 40-79 80-119 80-119 80-119 80-119 120+
## [29] 120+ 120+ 0-39g/day 0-39g/day 0-39g/day 0-39g/day 40-79
## [36] 40-79 40-79 40-79 80-119 80-119 80-119 80-119
## [43] 120+ 120+ 120+ 120+ 0-39g/day 0-39g/day 0-39g/day
## [50] 0-39g/day 40-79 40-79 40-79 40-79 80-119 80-119
## [57] 80-119 80-119 120+ 120+ 120+ 120+ 0-39g/day
## [64] 0-39g/day 0-39g/day 0-39g/day 40-79 40-79 40-79 80-119
## [71] 80-119 80-119 80-119 120+ 120+ 120+ 120+
## [78] 0-39g/day 0-39g/day 0-39g/day 40-79 40-79 40-79 40-79
## [85] 80-119 80-119 120+ 120+
## Levels: 0-39g/day < 40-79 < 80-119 < 120+
alcgp.freq <- table(esoph$alcgp)
alcgp.freq
##
## 0-39g/day 40-79 80-119 120+
## 23 23 21 21
cbind(alcgp.freq)
## alcgp.freq
## 0-39g/day 23
## 40-79 23
## 80-119 21
## 120+ 21
nrow(alcgp.freq)
## [1] 4
alcgp.relfreq <- alcgp.freq/nrow(alcgp.freq)
alcgp.relfreq
##
## 0-39g/day 40-79 80-119 120+
## 5.75 5.75 5.25 5.25
esoph$tobgp
## [1] 0-9g/day 10-19 20-29 30+ 0-9g/day 10-19 20-29
## [8] 30+ 0-9g/day 10-19 30+ 0-9g/day 10-19 20-29
## [15] 30+ 0-9g/day 10-19 20-29 30+ 0-9g/day 10-19
## [22] 20-29 30+ 0-9g/day 10-19 20-29 30+ 0-9g/day
## [29] 10-19 20-29 0-9g/day 10-19 20-29 30+ 0-9g/day
## [36] 10-19 20-29 30+ 0-9g/day 10-19 20-29 30+
## [43] 0-9g/day 10-19 20-29 30+ 0-9g/day 10-19 20-29
## [50] 30+ 0-9g/day 10-19 20-29 30+ 0-9g/day 10-19
## [57] 20-29 30+ 0-9g/day 10-19 20-29 30+ 0-9g/day
## [64] 10-19 20-29 30+ 0-9g/day 10-19 20-29 0-9g/day
## [71] 10-19 20-29 30+ 0-9g/day 10-19 20-29 30+
## [78] 0-9g/day 10-19 30+ 0-9g/day 10-19 20-29 30+
## [85] 0-9g/day 10-19 0-9g/day 10-19
## Levels: 0-9g/day < 10-19 < 20-29 < 30+
tobgp.freq <- table(esoph$tobgp)
tobgp.freq
##
## 0-9g/day 10-19 20-29 30+
## 24 24 20 20
cbind(tobgp.freq)
## tobgp.freq
## 0-9g/day 24
## 10-19 24
## 20-29 20
## 30+ 20
nrow(tobgp.freq)
## [1] 4
tobgp.relfreq <- tobgp.freq/nrow(tobgp.freq)
tobgp.relfreq
##
## 0-9g/day 10-19 20-29 30+
## 6 6 5 5
y <- c(esoph$alcgp)
y
## [1] 1 1 1 1 2 2 2 2 3 3 3 4 4 4 4 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 1 1 1 1 2
## [36] 2 2 2 3 3 3 3 4 4 4 4 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 1 1 1 1 2 2 2 3
## [71] 3 3 3 4 4 4 4 1 1 1 2 2 2 2 3 3 4 4
x <- c(esoph$agegp)
x
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3
## [36] 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5
## [71] 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6
esophtab <- table(x,y)
esophtab
## y
## x 1 2 3 4
## 1 4 4 3 4
## 2 4 4 4 3
## 3 4 4 4 4
## 4 4 4 4 4
## 5 4 3 4 4
## 6 3 4 2 2
ftable(esophtab)
## y 1 2 3 4
## x
## 1 4 4 3 4
## 2 4 4 4 3
## 3 4 4 4 4
## 4 4 4 4 4
## 5 4 3 4 4
## 6 3 4 2 2
y <- c(esoph$tobgp)
y
## [1] 1 2 3 4 1 2 3 4 1 2 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 1 2 3 4 1
## [36] 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 1
## [71] 2 3 4 1 2 3 4 1 2 4 1 2 3 4 1 2 1 2
x <- c(esoph$alcgp)
x
## [1] 1 1 1 1 2 2 2 2 3 3 3 4 4 4 4 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 1 1 1 1 2
## [36] 2 2 2 3 3 3 3 4 4 4 4 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 1 1 1 1 2 2 2 3
## [71] 3 3 3 4 4 4 4 1 1 1 2 2 2 2 3 3 4 4
esophtab2 <- table(x,y)
esophtab2
## y
## x 1 2 3 4
## 1 6 6 5 6
## 2 6 6 6 5
## 3 6 6 4 5
## 4 6 6 5 4
ftable(esophtab2)
## y 1 2 3 4
## x
## 1 6 6 5 6
## 2 6 6 6 5
## 3 6 6 4 5
## 4 6 6 5 4
DIAMONDS DATASET #InsertingDiamonds Dataset
library(ggplot2)
diamonds
## # A tibble: 53,940 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4.00 4.05 2.39
## # ... with 53,930 more rows
range(diamonds$price)
## [1] 326 18823
range(diamonds$carat)
## [1] 0.20 5.01
range(diamonds$depth)
## [1] 43 79
range(diamonds$table)
## [1] 43 95
library(tibble)
library(readr)
diamonds <- tbl_df (diamonds)
diamonds
## # A tibble: 53,940 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4.00 4.05 2.39
## # ... with 53,930 more rows
summary(diamonds)
## carat cut color clarity
## Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065
## 1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258
## Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194
## Mean :0.7979 Premium :13791 G:11292 VS1 : 8171
## 3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066
## Max. :5.0100 I: 5422 VVS1 : 3655
## J: 2808 (Other): 2531
## depth table price x
## Min. :43.00 Min. :43.00 Min. : 326 Min. : 0.000
## 1st Qu.:61.00 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710
## Median :61.80 Median :57.00 Median : 2401 Median : 5.700
## Mean :61.75 Mean :57.46 Mean : 3933 Mean : 5.731
## 3rd Qu.:62.50 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540
## Max. :79.00 Max. :95.00 Max. :18823 Max. :10.740
##
## y z
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.720 1st Qu.: 2.910
## Median : 5.710 Median : 3.530
## Mean : 5.735 Mean : 3.539
## 3rd Qu.: 6.540 3rd Qu.: 4.040
## Max. :58.900 Max. :31.800
##
breaks.depth = seq(40,80, by = 5)
breaks.depth
## [1] 40 45 50 55 60 65 70 75 80
depth.cut = cut(diamonds$depth, breaks.depth, right = FALSE)
depth.freq = table(depth.cut)
depth.freq
## depth.cut
## [40,45) [45,50) [50,55) [55,60) [60,65) [65,70) [70,75) [75,80)
## 3 0 19 5092 47932 870 21 3
cbind(depth.freq)
## depth.freq
## [40,45) 3
## [45,50) 0
## [50,55) 19
## [55,60) 5092
## [60,65) 47932
## [65,70) 870
## [70,75) 21
## [75,80) 3
breaks.table = seq(40, 95, by=5)
breaks.table
## [1] 40 45 50 55 60 65 70 75 80 85 90 95
table.cut = cut(diamonds$table, breaks.table, right = FALSE)
table.freq = table(table.cut)
table.freq
## table.cut
## [40,45) [45,50) [50,55) [55,60) [60,65) [65,70) [70,75) [75,80) [80,85)
## 2 2 3571 41278 8760 310 14 2 0
## [85,90) [90,95)
## 0 0
cbind(table.freq)
## table.freq
## [40,45) 2
## [45,50) 2
## [50,55) 3571
## [55,60) 41278
## [60,65) 8760
## [65,70) 310
## [70,75) 14
## [75,80) 2
## [80,85) 0
## [85,90) 0
## [90,95) 0
hist(diamonds$price)
hist(diamonds$carat)
hist(diamonds$depth)
hist(diamonds$table)
knitr::opts_chunk$set(echo = TRUE) ```