library(datasets)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
glimpse(mtcars)
## Observations: 32
## Variables: 11
## $ mpg  <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19....
## $ cyl  <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, ...
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 1...
## $ hp   <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, ...
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.9...
## $ wt   <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3...
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 2...
## $ vs   <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ am   <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, ...
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, ...
help(mtcars)
?mtcars
data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
tail(mtcars)
##                 mpg cyl  disp  hp drat    wt qsec vs am gear carb
## Porsche 914-2  26.0   4 120.3  91 4.43 2.140 16.7  0  1    5    2
## Lotus Europa   30.4   4  95.1 113 3.77 1.513 16.9  1  1    5    2
## Ford Pantera L 15.8   8 351.0 264 4.22 3.170 14.5  0  1    5    4
## Ferrari Dino   19.7   6 145.0 175 3.62 2.770 15.5  0  1    5    6
## Maserati Bora  15.0   8 301.0 335 3.54 3.570 14.6  0  1    5    8
## Volvo 142E     21.4   4 121.0 109 4.11 2.780 18.6  1  1    4    2
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
mtcars$mpg
##  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
## [29] 15.8 19.7 15.0 21.4
stem(mtcars$mpg)
## 
##   The decimal point is at the |
## 
##   10 | 44
##   12 | 3
##   14 | 3702258
##   16 | 438
##   18 | 17227
##   20 | 00445
##   22 | 88
##   24 | 4
##   26 | 03
##   28 | 
##   30 | 44
##   32 | 49
fivenum(mtcars$mpg)
## [1] 10.40 15.35 19.20 22.80 33.90
quantile(mtcars$mpg)
##     0%    25%    50%    75%   100% 
## 10.400 15.425 19.200 22.800 33.900
help(esoph)
?esoph

summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
summary(mtcars$mpg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.40   15.42   19.20   20.09   22.80   33.90
summary(mtcars$cyl)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.000   4.000   6.000   6.188   8.000   8.000
summary(mtcars$disp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    71.1   120.8   196.3   230.7   326.0   472.0
summary(mtcars$mpg,mtcars$cyl,mtcars$disp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.40   15.42   19.20   20.09   22.80   33.90
###esoph table

help(esoph)
esoph_df <-tbl_df(esoph)
e.freq <- table(esoph$agegp, esoph$alcgp, esoph$tobgp)
e.freq
## , ,  = 0-9g/day
## 
##        
##         0-39g/day 40-79 80-119 120+
##   25-34         1     1      1    1
##   35-44         1     1      1    1
##   45-54         1     1      1    1
##   55-64         1     1      1    1
##   65-74         1     1      1    1
##   75+           1     1      1    1
## 
## , ,  = 10-19
## 
##        
##         0-39g/day 40-79 80-119 120+
##   25-34         1     1      1    1
##   35-44         1     1      1    1
##   45-54         1     1      1    1
##   55-64         1     1      1    1
##   65-74         1     1      1    1
##   75+           1     1      1    1
## 
## , ,  = 20-29
## 
##        
##         0-39g/day 40-79 80-119 120+
##   25-34         1     1      0    1
##   35-44         1     1      1    1
##   45-54         1     1      1    1
##   55-64         1     1      1    1
##   65-74         1     1      1    1
##   75+           0     1      0    0
## 
## , ,  = 30+
## 
##        
##         0-39g/day 40-79 80-119 120+
##   25-34         1     1      1    1
##   35-44         1     1      1    0
##   45-54         1     1      1    1
##   55-64         1     1      1    1
##   65-74         1     0      1    1
##   75+           1     1      0    0
e.relfreq <- e.freq / nrow(esoph)
e.relfreq
## , ,  = 0-9g/day
## 
##        
##          0-39g/day      40-79     80-119       120+
##   25-34 0.01136364 0.01136364 0.01136364 0.01136364
##   35-44 0.01136364 0.01136364 0.01136364 0.01136364
##   45-54 0.01136364 0.01136364 0.01136364 0.01136364
##   55-64 0.01136364 0.01136364 0.01136364 0.01136364
##   65-74 0.01136364 0.01136364 0.01136364 0.01136364
##   75+   0.01136364 0.01136364 0.01136364 0.01136364
## 
## , ,  = 10-19
## 
##        
##          0-39g/day      40-79     80-119       120+
##   25-34 0.01136364 0.01136364 0.01136364 0.01136364
##   35-44 0.01136364 0.01136364 0.01136364 0.01136364
##   45-54 0.01136364 0.01136364 0.01136364 0.01136364
##   55-64 0.01136364 0.01136364 0.01136364 0.01136364
##   65-74 0.01136364 0.01136364 0.01136364 0.01136364
##   75+   0.01136364 0.01136364 0.01136364 0.01136364
## 
## , ,  = 20-29
## 
##        
##          0-39g/day      40-79     80-119       120+
##   25-34 0.01136364 0.01136364 0.00000000 0.01136364
##   35-44 0.01136364 0.01136364 0.01136364 0.01136364
##   45-54 0.01136364 0.01136364 0.01136364 0.01136364
##   55-64 0.01136364 0.01136364 0.01136364 0.01136364
##   65-74 0.01136364 0.01136364 0.01136364 0.01136364
##   75+   0.00000000 0.01136364 0.00000000 0.00000000
## 
## , ,  = 30+
## 
##        
##          0-39g/day      40-79     80-119       120+
##   25-34 0.01136364 0.01136364 0.01136364 0.01136364
##   35-44 0.01136364 0.01136364 0.01136364 0.00000000
##   45-54 0.01136364 0.01136364 0.01136364 0.01136364
##   55-64 0.01136364 0.01136364 0.01136364 0.01136364
##   65-74 0.01136364 0.00000000 0.01136364 0.01136364
##   75+   0.01136364 0.01136364 0.00000000 0.00000000
round.e.relfreq <- round (e.relfreq, digits = 2)
round.e.relfreq
## , ,  = 0-9g/day
## 
##        
##         0-39g/day 40-79 80-119 120+
##   25-34      0.01  0.01   0.01 0.01
##   35-44      0.01  0.01   0.01 0.01
##   45-54      0.01  0.01   0.01 0.01
##   55-64      0.01  0.01   0.01 0.01
##   65-74      0.01  0.01   0.01 0.01
##   75+        0.01  0.01   0.01 0.01
## 
## , ,  = 10-19
## 
##        
##         0-39g/day 40-79 80-119 120+
##   25-34      0.01  0.01   0.01 0.01
##   35-44      0.01  0.01   0.01 0.01
##   45-54      0.01  0.01   0.01 0.01
##   55-64      0.01  0.01   0.01 0.01
##   65-74      0.01  0.01   0.01 0.01
##   75+        0.01  0.01   0.01 0.01
## 
## , ,  = 20-29
## 
##        
##         0-39g/day 40-79 80-119 120+
##   25-34      0.01  0.01   0.00 0.01
##   35-44      0.01  0.01   0.01 0.01
##   45-54      0.01  0.01   0.01 0.01
##   55-64      0.01  0.01   0.01 0.01
##   65-74      0.01  0.01   0.01 0.01
##   75+        0.00  0.01   0.00 0.00
## 
## , ,  = 30+
## 
##        
##         0-39g/day 40-79 80-119 120+
##   25-34      0.01  0.01   0.01 0.01
##   35-44      0.01  0.01   0.01 0.00
##   45-54      0.01  0.01   0.01 0.01
##   55-64      0.01  0.01   0.01 0.01
##   65-74      0.01  0.00   0.01 0.01
##   75+        0.01  0.01   0.00 0.00
acrosstab1 <- table(esoph$agegp, esoph$alcg)
acrosstab1
##        
##         0-39g/day 40-79 80-119 120+
##   25-34         4     4      3    4
##   35-44         4     4      4    3
##   45-54         4     4      4    4
##   55-64         4     4      4    4
##   65-74         4     3      4    4
##   75+           3     4      2    2
ftable(acrosstab1)
##        0-39g/day 40-79 80-119 120+
##                                   
## 25-34          4     4      3    4
## 35-44          4     4      4    3
## 45-54          4     4      4    4
## 55-64          4     4      4    4
## 65-74          4     3      4    4
## 75+            3     4      2    2
anotheronextab1 <- xtabs(~esoph$agegp + esoph$alcg) 
anotheronextab1
##            esoph$alcg
## esoph$agegp 0-39g/day 40-79 80-119 120+
##       25-34         4     4      3    4
##       35-44         4     4      4    3
##       45-54         4     4      4    4
##       55-64         4     4      4    4
##       65-74         4     3      4    4
##       75+           3     4      2    2
ftable(anotheronextab1)
##             esoph$alcg 0-39g/day 40-79 80-119 120+
## esoph$agegp                                       
## 25-34                          4     4      3    4
## 35-44                          4     4      4    3
## 45-54                          4     4      4    4
## 55-64                          4     4      4    4
## 65-74                          4     3      4    4
## 75+                            3     4      2    2
acrosstab2 <- table(esoph$alcgp, esoph$tobgp)
acrosstab2
##            
##             0-9g/day 10-19 20-29 30+
##   0-39g/day        6     6     5   6
##   40-79            6     6     6   5
##   80-119           6     6     4   5
##   120+             6     6     5   4
ftable(acrosstab2)
##            0-9g/day 10-19 20-29 30+
##                                    
## 0-39g/day         6     6     5   6
## 40-79             6     6     6   5
## 80-119            6     6     4   5
## 120+              6     6     5   4
anotheronextab2 <- xtabs(~esoph$alcgp + esoph$tobgp) 
anotheronextab2
##            esoph$tobgp
## esoph$alcgp 0-9g/day 10-19 20-29 30+
##   0-39g/day        6     6     5   6
##   40-79            6     6     6   5
##   80-119           6     6     4   5
##   120+             6     6     5   4
ftable(anotheronextab2)
##             esoph$tobgp 0-9g/day 10-19 20-29 30+
## esoph$alcgp                                     
## 0-39g/day                      6     6     5   6
## 40-79                          6     6     6   5
## 80-119                         6     6     4   5
## 120+                           6     6     5   4
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
?diamonds 
summary(diamonds)
##      carat               cut        color        clarity     
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655  
##                                     J: 2808   (Other): 2531  
##      depth           table           price             x         
##  Min.   :43.00   Min.   :43.00   Min.   :  326   Min.   : 0.000  
##  1st Qu.:61.00   1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710  
##  Median :61.80   Median :57.00   Median : 2401   Median : 5.700  
##  Mean   :61.75   Mean   :57.46   Mean   : 3933   Mean   : 5.731  
##  3rd Qu.:62.50   3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540  
##  Max.   :79.00   Max.   :95.00   Max.   :18823   Max.   :10.740  
##                                                                  
##        y                z         
##  Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 4.720   1st Qu.: 2.910  
##  Median : 5.710   Median : 3.530  
##  Mean   : 5.735   Mean   : 3.539  
##  3rd Qu.: 6.540   3rd Qu.: 4.040  
##  Max.   :58.900   Max.   :31.800  
## 
d.price <-diamonds$price
range(d.price )
## [1]   326 18823
d.price_break <- seq (310, 19000, by=1600)
d.price_break 
##  [1]   310  1910  3510  5110  6710  8310  9910 11510 13110 14710 16310
## [12] 17910
d.price.cut <- cut(d.price, d.price_break, right=FALSE)

d.price.freq <-table(d.price.cut)
d.price.freq
## d.price.cut
##      [310,1.91e+03) [1.91e+03,3.51e+03) [3.51e+03,5.11e+03) 
##               23499                9034                7123 
## [5.11e+03,6.71e+03) [6.71e+03,8.31e+03) [8.31e+03,9.91e+03) 
##                4436                2647                1902 
## [9.91e+03,1.15e+04) [1.15e+04,1.31e+04) [1.31e+04,1.47e+04) 
##                1469                1136                 913 
## [1.47e+04,1.63e+04) [1.63e+04,1.79e+04) 
##                 772                 669
cbind(d.price.freq)
##                     d.price.freq
## [310,1.91e+03)             23499
## [1.91e+03,3.51e+03)         9034
## [3.51e+03,5.11e+03)         7123
## [5.11e+03,6.71e+03)         4436
## [6.71e+03,8.31e+03)         2647
## [8.31e+03,9.91e+03)         1902
## [9.91e+03,1.15e+04)         1469
## [1.15e+04,1.31e+04)         1136
## [1.31e+04,1.47e+04)          913
## [1.47e+04,1.63e+04)          772
## [1.63e+04,1.79e+04)          669
hist(diamonds$price)

hist(diamonds$carat)

hist(diamonds$depth)

hist(diamonds$table)

range(diamonds$carat)
## [1] 0.20 5.01
range(diamonds$depth)
## [1] 43 79
range(diamonds$table)
## [1] 43 95