Loaded Packages

library(dplyr)
library(datasets)
library(tibble)
mtcars <- tbl_df(mtcars)
mtcars
# A tibble: 32 x 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
 * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  21.0     6 160.0   110  3.90 2.620 16.46     0     1     4     4
 2  21.0     6 160.0   110  3.90 2.875 17.02     0     1     4     4
 3  22.8     4 108.0    93  3.85 2.320 18.61     1     1     4     1
 4  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1
 5  18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2
 6  18.1     6 225.0   105  2.76 3.460 20.22     1     0     3     1
 7  14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4
 8  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2
 9  22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2
10  19.2     6 167.6   123  3.92 3.440 18.30     1     0     4     4
# ... with 22 more rows

Determining the Classifications of each Variable in MTCARS

mpg=continuous

cyl=discrete

disp=continuous

hp=continuous

drat=continuous

wt=continuous

qsec=continuous

vs=discrete

am=discrete

gear=discrete

carb=discrete

Distribution of Three Variables in MTCARS

summary(mtcars)
      mpg             cyl             disp             hp       
 Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
 1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
 Median :19.20   Median :6.000   Median :196.3   Median :123.0  
 Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
 3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
 Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
      drat             wt             qsec             vs        
 Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
 1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
 Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
 Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
 3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
 Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
       am              gear            carb      
 Min.   :0.0000   Min.   :3.000   Min.   :1.000  
 1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
 Median :0.0000   Median :4.000   Median :2.000  
 Mean   :0.4062   Mean   :3.688   Mean   :2.812  
 3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
 Max.   :1.0000   Max.   :5.000   Max.   :8.000  
mtcars$mpg
 [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
[15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
[29] 15.8 19.7 15.0 21.4
mtcars$cyl
 [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4
mtcars$disp
 [1] 160.0 160.0 108.0 258.0 360.0 225.0 360.0 146.7 140.8 167.6 167.6
[12] 275.8 275.8 275.8 472.0 460.0 440.0  78.7  75.7  71.1 120.1 318.0
[23] 304.0 350.0 400.0  79.0 120.3  95.1 351.0 145.0 301.0 121.0

Data Type of Three Variables in Esoph

esoph<- tbl_df(esoph)
esoph
# A tibble: 88 x 5
   agegp     alcgp    tobgp ncases ncontrols
   <ord>     <ord>    <ord>  <dbl>     <dbl>
 1 25-34 0-39g/day 0-9g/day      0        40
 2 25-34 0-39g/day    10-19      0        10
 3 25-34 0-39g/day    20-29      0         6
 4 25-34 0-39g/day      30+      0         5
 5 25-34     40-79 0-9g/day      0        27
 6 25-34     40-79    10-19      0         7
 7 25-34     40-79    20-29      0         4
 8 25-34     40-79      30+      0         7
 9 25-34    80-119 0-9g/day      0         2
10 25-34    80-119    10-19      0         1
# ... with 78 more rows

agegp=discrete

alcgp=discrete

tobgp=discrete

Frequency Distribution of agegp

esoph$agegp
 [1] 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34
[12] 25-34 25-34 25-34 25-34 35-44 35-44 35-44 35-44 35-44 35-44 35-44
[23] 35-44 35-44 35-44 35-44 35-44 35-44 35-44 35-44 45-54 45-54 45-54
[34] 45-54 45-54 45-54 45-54 45-54 45-54 45-54 45-54 45-54 45-54 45-54
[45] 45-54 45-54 55-64 55-64 55-64 55-64 55-64 55-64 55-64 55-64 55-64
[56] 55-64 55-64 55-64 55-64 55-64 55-64 55-64 65-74 65-74 65-74 65-74
[67] 65-74 65-74 65-74 65-74 65-74 65-74 65-74 65-74 65-74 65-74 65-74
[78] 75+   75+   75+   75+   75+   75+   75+   75+   75+   75+   75+  
Levels: 25-34 < 35-44 < 45-54 < 55-64 < 65-74 < 75+
agegp.freq <- table (esoph$agegp)
agegp.freq

25-34 35-44 45-54 55-64 65-74   75+ 
   15    15    16    16    15    11 
cbind(agegp.freq)
      agegp.freq
25-34         15
35-44         15
45-54         16
55-64         16
65-74         15
75+           11

Relative Frequency Distribution of agegp

nrow(agegp.freq)
[1] 6
agegp.relfreq <- agegp.freq / nrow(agegp.freq)
agegp.relfreq

   25-34    35-44    45-54    55-64    65-74      75+ 
2.500000 2.500000 2.666667 2.666667 2.500000 1.833333 
round.agegp.relfreq <- round(agegp.relfreq, digits = 2)
round.agegp.relfreq

25-34 35-44 45-54 55-64 65-74   75+ 
 2.50  2.50  2.67  2.67  2.50  1.83 

Frequency Distribution of alcgp

esoph$alcgp
 [1] 0-39g/day 0-39g/day 0-39g/day 0-39g/day 40-79     40-79     40-79    
 [8] 40-79     80-119    80-119    80-119    120+      120+      120+     
[15] 120+      0-39g/day 0-39g/day 0-39g/day 0-39g/day 40-79     40-79    
[22] 40-79     40-79     80-119    80-119    80-119    80-119    120+     
[29] 120+      120+      0-39g/day 0-39g/day 0-39g/day 0-39g/day 40-79    
[36] 40-79     40-79     40-79     80-119    80-119    80-119    80-119   
[43] 120+      120+      120+      120+      0-39g/day 0-39g/day 0-39g/day
[50] 0-39g/day 40-79     40-79     40-79     40-79     80-119    80-119   
[57] 80-119    80-119    120+      120+      120+      120+      0-39g/day
[64] 0-39g/day 0-39g/day 0-39g/day 40-79     40-79     40-79     80-119   
[71] 80-119    80-119    80-119    120+      120+      120+      120+     
[78] 0-39g/day 0-39g/day 0-39g/day 40-79     40-79     40-79     40-79    
[85] 80-119    80-119    120+      120+     
Levels: 0-39g/day < 40-79 < 80-119 < 120+
alcgp.freq <- table(esoph$alcgp)
alcgp.freq

0-39g/day     40-79    80-119      120+ 
       23        23        21        21 
cbind(alcgp.freq)
          alcgp.freq
0-39g/day         23
40-79             23
80-119            21
120+              21

Relative Frequency Distribution of alcgp

nrow(alcgp.freq)
[1] 4
alchgp.relfreq <- alcgp.freq/ nrow(alcgp.freq)
alchgp.relfreq

0-39g/day     40-79    80-119      120+ 
     5.75      5.75      5.25      5.25 

Frequency Distribution of tobgp

esoph$tobgp
 [1] 0-9g/day 10-19    20-29    30+      0-9g/day 10-19    20-29   
 [8] 30+      0-9g/day 10-19    30+      0-9g/day 10-19    20-29   
[15] 30+      0-9g/day 10-19    20-29    30+      0-9g/day 10-19   
[22] 20-29    30+      0-9g/day 10-19    20-29    30+      0-9g/day
[29] 10-19    20-29    0-9g/day 10-19    20-29    30+      0-9g/day
[36] 10-19    20-29    30+      0-9g/day 10-19    20-29    30+     
[43] 0-9g/day 10-19    20-29    30+      0-9g/day 10-19    20-29   
[50] 30+      0-9g/day 10-19    20-29    30+      0-9g/day 10-19   
[57] 20-29    30+      0-9g/day 10-19    20-29    30+      0-9g/day
[64] 10-19    20-29    30+      0-9g/day 10-19    20-29    0-9g/day
[71] 10-19    20-29    30+      0-9g/day 10-19    20-29    30+     
[78] 0-9g/day 10-19    30+      0-9g/day 10-19    20-29    30+     
[85] 0-9g/day 10-19    0-9g/day 10-19   
Levels: 0-9g/day < 10-19 < 20-29 < 30+
tobgp.freq <- table (esoph$tobgp)
tobgp.freq

0-9g/day    10-19    20-29      30+ 
      24       24       20       20 
cbind(tobgp.freq)
         tobgp.freq
0-9g/day         24
10-19            24
20-29            20
30+              20

Relative Frequency Distribution of tobgp

nrow(tobgp.freq)
[1] 4
tobgp.relfreq <- tobgp.freq/ nrow(tobgp.freq)
tobgp.relfreq

0-9g/day    10-19    20-29      30+ 
       6        6        5        5 

Joint Frequency of agegp and alcgp

y <- c(esoph$alcgp)
y
 [1] 1 1 1 1 2 2 2 2 3 3 3 4 4 4 4 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 1 1 1 1 2
[36] 2 2 2 3 3 3 3 4 4 4 4 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 1 1 1 1 2 2 2 3
[71] 3 3 3 4 4 4 4 1 1 1 2 2 2 2 3 3 4 4
x <- c(esoph$agegp)
x
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3
[36] 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5
[71] 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6
esophtab <- table(x,y)
esophtab
   y
x   1 2 3 4
  1 4 4 3 4
  2 4 4 4 3
  3 4 4 4 4
  4 4 4 4 4
  5 4 3 4 4
  6 3 4 2 2
ftable (esophtab)
  y 1 2 3 4
x          
1   4 4 3 4
2   4 4 4 3
3   4 4 4 4
4   4 4 4 4
5   4 3 4 4
6   3 4 2 2

Joint Frequency of alcgp and tobgp

y <- c(esoph$tobgp)
y
 [1] 1 2 3 4 1 2 3 4 1 2 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 1 2 3 4 1
[36] 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 1
[71] 2 3 4 1 2 3 4 1 2 4 1 2 3 4 1 2 1 2
x <- c(esoph$alcgp)
x
 [1] 1 1 1 1 2 2 2 2 3 3 3 4 4 4 4 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 1 1 1 1 2
[36] 2 2 2 3 3 3 3 4 4 4 4 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 1 1 1 1 2 2 2 3
[71] 3 3 3 4 4 4 4 1 1 1 2 2 2 2 3 3 4 4
esophtab2 <- table(x,y)
esophtab2
   y
x   1 2 3 4
  1 6 6 5 6
  2 6 6 6 5
  3 6 6 4 5
  4 6 6 5 4
ftable(esophtab2)
  y 1 2 3 4
x          
1   6 6 5 6
2   6 6 6 5
3   6 6 4 5
4   6 6 5 4

Diamonds Dataset

library(ggplot2)
diamonds
# A tibble: 53,940 x 10
   carat       cut color clarity depth table price     x     y     z
   <dbl>     <ord> <ord>   <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
 2  0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
 3  0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
 4  0.29   Premium     I     VS2  62.4    58   334  4.20  4.23  2.63
 5  0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
 6  0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48
 7  0.24 Very Good     I    VVS1  62.3    57   336  3.95  3.98  2.47
 8  0.26 Very Good     H     SI1  61.9    55   337  4.07  4.11  2.53
 9  0.22      Fair     E     VS2  65.1    61   337  3.87  3.78  2.49
10  0.23 Very Good     H     VS1  59.4    61   338  4.00  4.05  2.39
# ... with 53,930 more rows

Range of Price

range(diamonds$price)
[1]   326 18823

Range of Carat

range(diamonds$carat)
[1] 0.20 5.01

Range of Depth

range(diamonds$depth)
[1] 43 79

Range of Table

range(diamonds$table)
[1] 43 95

Group Frequency of Depth in Diamonds

diamonds <- tbl_df(diamonds)
diamonds
# A tibble: 53,940 x 10
   carat       cut color clarity depth table price     x     y     z
   <dbl>     <ord> <ord>   <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
 2  0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
 3  0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
 4  0.29   Premium     I     VS2  62.4    58   334  4.20  4.23  2.63
 5  0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
 6  0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48
 7  0.24 Very Good     I    VVS1  62.3    57   336  3.95  3.98  2.47
 8  0.26 Very Good     H     SI1  61.9    55   337  4.07  4.11  2.53
 9  0.22      Fair     E     VS2  65.1    61   337  3.87  3.78  2.49
10  0.23 Very Good     H     VS1  59.4    61   338  4.00  4.05  2.39
# ... with 53,930 more rows
summary(diamonds)
     carat               cut        color        clarity     
 Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065  
 1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258  
 Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194  
 Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171  
 3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066  
 Max.   :5.0100                     I: 5422   VVS1   : 3655  
                                    J: 2808   (Other): 2531  
     depth           table           price             x         
 Min.   :43.00   Min.   :43.00   Min.   :  326   Min.   : 0.000  
 1st Qu.:61.00   1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710  
 Median :61.80   Median :57.00   Median : 2401   Median : 5.700  
 Mean   :61.75   Mean   :57.46   Mean   : 3933   Mean   : 5.731  
 3rd Qu.:62.50   3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540  
 Max.   :79.00   Max.   :95.00   Max.   :18823   Max.   :10.740  
                                                                 
       y                z         
 Min.   : 0.000   Min.   : 0.000  
 1st Qu.: 4.720   1st Qu.: 2.910  
 Median : 5.710   Median : 3.530  
 Mean   : 5.735   Mean   : 3.539  
 3rd Qu.: 6.540   3rd Qu.: 4.040  
 Max.   :58.900   Max.   :31.800  
                                  
breaks.depth=seq(40,80, by=5)
breaks.depth
[1] 40 45 50 55 60 65 70 75 80
depth.cut=cut(diamonds$depth, breaks.depth, right=FALSE)
depth.freq=table(depth.cut)
depth.freq
depth.cut
[40,45) [45,50) [50,55) [55,60) [60,65) [65,70) [70,75) [75,80) 
      3       0      19    5092   47932     870      21       3 
cbind(depth.freq)
        depth.freq
[40,45)          3
[45,50)          0
[50,55)         19
[55,60)       5092
[60,65)      47932
[65,70)        870
[70,75)         21
[75,80)          3

Grouped Frequency of Table in Diamonds

breaks.table=seq(40,95, by =5)
breaks.table
 [1] 40 45 50 55 60 65 70 75 80 85 90 95
table.cut=cut(diamonds$table, breaks.table, right=FALSE)
table.freq=table(table.cut)
table.freq
table.cut
[40,45) [45,50) [50,55) [55,60) [60,65) [65,70) [70,75) [75,80) [80,85) 
      2       2    3571   41278    8760     310      14       2       0 
[85,90) [90,95) 
      0       0 
cbind(table.freq)
        table.freq
[40,45)          2
[45,50)          2
[50,55)       3571
[55,60)      41278
[60,65)       8760
[65,70)        310
[70,75)         14
[75,80)          2
[80,85)          0
[85,90)          0
[90,95)          0

Plotting Histogram of Price

hist(diamonds$price)

Plotting Histogram of Carat

hist(diamonds$carat)

Plotting Histogram of Depth

hist(diamonds$depth)

Plotting Histogram of Table

hist(diamonds$table)