QUESTION 1

#Question 1:  Using R code, identify the data type of each variable in mtcars, a dataset built into R as long as you have the package, datasets, installed.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(datasets)
glimpse(mtcars)
## Observations: 32
## Variables: 11
## $ mpg  <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19....
## $ cyl  <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, ...
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 1...
## $ hp   <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, ...
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.9...
## $ wt   <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3...
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 2...
## $ vs   <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ am   <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, ...
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, ...
mtcars
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
#character data:  Names of cars
#numeric data:  mpg (Miles/(US) gallon), disp (Displacement (cu.in.)), hp (Gross horsepower), drat (Rear axle ratio), wt (Weight (1000 lbs)), qsec (1/4 mile time)
#integer data:  cyl (Number of cylinders), vs (v or straight engine shape), gear (Number of forward gears), am (Transmission (0 = automatic, 1 = manual)), carb (Number of carburetors)
#source for dataset information:  https://www.rdocumentation.org/packages/datasets/versions/3.4.1/topics/mtcars 

DATA TYPES OF VARIABLES

Character Data: Names of cars

Numeric Data: mpg (Miles/(US) gallon), disp (Displacement (cu.in.)), hp (Gross horsepower), drat (Rear axle ratio), wt (Weight (1000 lbs)), qsec (1/4 mile time)

Integer Data: cyl (Number of cylinders), vs (v or straight engine shape), gear (Number of forward gears), am (Transmission (0 = automatic, 1 = manual)), carb (Number of carburetors)

QUESTION 2

CONTINUOUS: mpg, disp, drat, wt, qsec
DISCRETE: cyl, hp, vs, am, gear, carb

QUESTION 3

#1 variable:  mpg
mpg_summary <- summary(mtcars$mpg)
mpg_summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.40   15.42   19.20   20.09   22.80   33.90
#2 variable:  disp
disp_summary <-summary(mtcars$disp)
disp_summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    71.1   120.8   196.3   230.7   326.0   472.0
#3 variable:  vs
vs_summary <-summary(mtcars$vs)
vs_summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.4375  1.0000  1.0000

QUESTION 4

data(esoph)
summary(esoph)
##    agegp          alcgp         tobgp        ncases         ncontrols    
##  25-34:15   0-39g/day:23   0-9g/day:24   Min.   : 0.000   Min.   : 1.00  
##  35-44:15   40-79    :23   10-19   :24   1st Qu.: 0.000   1st Qu.: 3.00  
##  45-54:16   80-119   :21   20-29   :20   Median : 1.000   Median : 6.00  
##  55-64:16   120+     :21   30+     :20   Mean   : 2.273   Mean   :11.08  
##  65-74:15                                3rd Qu.: 4.000   3rd Qu.:14.00  
##  75+  :11                                Max.   :17.000   Max.   :60.00
esoph
##    agegp     alcgp    tobgp ncases ncontrols
## 1  25-34 0-39g/day 0-9g/day      0        40
## 2  25-34 0-39g/day    10-19      0        10
## 3  25-34 0-39g/day    20-29      0         6
## 4  25-34 0-39g/day      30+      0         5
## 5  25-34     40-79 0-9g/day      0        27
## 6  25-34     40-79    10-19      0         7
## 7  25-34     40-79    20-29      0         4
## 8  25-34     40-79      30+      0         7
## 9  25-34    80-119 0-9g/day      0         2
## 10 25-34    80-119    10-19      0         1
## 11 25-34    80-119      30+      0         2
## 12 25-34      120+ 0-9g/day      0         1
## 13 25-34      120+    10-19      1         1
## 14 25-34      120+    20-29      0         1
## 15 25-34      120+      30+      0         2
## 16 35-44 0-39g/day 0-9g/day      0        60
## 17 35-44 0-39g/day    10-19      1        14
## 18 35-44 0-39g/day    20-29      0         7
## 19 35-44 0-39g/day      30+      0         8
## 20 35-44     40-79 0-9g/day      0        35
## 21 35-44     40-79    10-19      3        23
## 22 35-44     40-79    20-29      1        14
## 23 35-44     40-79      30+      0         8
## 24 35-44    80-119 0-9g/day      0        11
## 25 35-44    80-119    10-19      0         6
## 26 35-44    80-119    20-29      0         2
## 27 35-44    80-119      30+      0         1
## 28 35-44      120+ 0-9g/day      2         3
## 29 35-44      120+    10-19      0         3
## 30 35-44      120+    20-29      2         4
## 31 45-54 0-39g/day 0-9g/day      1        46
## 32 45-54 0-39g/day    10-19      0        18
## 33 45-54 0-39g/day    20-29      0        10
## 34 45-54 0-39g/day      30+      0         4
## 35 45-54     40-79 0-9g/day      6        38
## 36 45-54     40-79    10-19      4        21
## 37 45-54     40-79    20-29      5        15
## 38 45-54     40-79      30+      5         7
## 39 45-54    80-119 0-9g/day      3        16
## 40 45-54    80-119    10-19      6        14
## 41 45-54    80-119    20-29      1         5
## 42 45-54    80-119      30+      2         4
## 43 45-54      120+ 0-9g/day      4         4
## 44 45-54      120+    10-19      3         4
## 45 45-54      120+    20-29      2         3
## 46 45-54      120+      30+      4         4
## 47 55-64 0-39g/day 0-9g/day      2        49
## 48 55-64 0-39g/day    10-19      3        22
## 49 55-64 0-39g/day    20-29      3        12
## 50 55-64 0-39g/day      30+      4         6
## 51 55-64     40-79 0-9g/day      9        40
## 52 55-64     40-79    10-19      6        21
## 53 55-64     40-79    20-29      4        17
## 54 55-64     40-79      30+      3         6
## 55 55-64    80-119 0-9g/day      9        18
## 56 55-64    80-119    10-19      8        15
## 57 55-64    80-119    20-29      3         6
## 58 55-64    80-119      30+      4         4
## 59 55-64      120+ 0-9g/day      5        10
## 60 55-64      120+    10-19      6         7
## 61 55-64      120+    20-29      2         3
## 62 55-64      120+      30+      5         6
## 63 65-74 0-39g/day 0-9g/day      5        48
## 64 65-74 0-39g/day    10-19      4        14
## 65 65-74 0-39g/day    20-29      2         7
## 66 65-74 0-39g/day      30+      0         2
## 67 65-74     40-79 0-9g/day     17        34
## 68 65-74     40-79    10-19      3        10
## 69 65-74     40-79    20-29      5         9
## 70 65-74    80-119 0-9g/day      6        13
## 71 65-74    80-119    10-19      4        12
## 72 65-74    80-119    20-29      2         3
## 73 65-74    80-119      30+      1         1
## 74 65-74      120+ 0-9g/day      3         4
## 75 65-74      120+    10-19      1         2
## 76 65-74      120+    20-29      1         1
## 77 65-74      120+      30+      1         1
## 78   75+ 0-39g/day 0-9g/day      1        18
## 79   75+ 0-39g/day    10-19      2         6
## 80   75+ 0-39g/day      30+      1         3
## 81   75+     40-79 0-9g/day      2         5
## 82   75+     40-79    10-19      1         3
## 83   75+     40-79    20-29      0         3
## 84   75+     40-79      30+      1         1
## 85   75+    80-119 0-9g/day      1         1
## 86   75+    80-119    10-19      1         1
## 87   75+      120+ 0-9g/day      2         2
## 88   75+      120+    10-19      1         1
#esoph
#agegp= age grouping - ordered, factor
#alcgp= alcohol consumption - ordered, factor
#tobgp= tobacco consumption - ordered, factor

# Summary of age
agegp_summary <- summary(esoph$agegp)
agegp_summary
## 25-34 35-44 45-54 55-64 65-74   75+ 
##    15    15    16    16    15    11
#Summary of alcohol consumption
alcgp_summary <- summary(esoph$alcgp)
alcgp_summary
## 0-39g/day     40-79    80-119      120+ 
##        23        23        21        21
#Summary of tobacco consumption
tobgp_summary <- summary(esoph$tobgp)
tobgp_summary
## 0-9g/day    10-19    20-29      30+ 
##       24       24       20       20

agegp= age grouping - ordered, factor
alcgp= alcohol consumption - ordered, factor
tobgp= tobacco consumption - ordered, factor

QUESTION 5

# FREQUENCIES
freqagegp <-table(esoph$agegp)
freqagegp
## 
## 25-34 35-44 45-54 55-64 65-74   75+ 
##    15    15    16    16    15    11
freqalcgp <-table(esoph$alcgp)
freqalcgp
## 
## 0-39g/day     40-79    80-119      120+ 
##        23        23        21        21
freqtobgp <-table(esoph$tobgp)
freqtobgp
## 
## 0-9g/day    10-19    20-29      30+ 
##       24       24       20       20
#RELATIVE FREQUENCIES
relfreqagegp <-freqagegp/nrow(esoph)
rnd_relfreqagegp <-round(relfreqagegp,digits = 2)
rnd_relfreqagegp
## 
## 25-34 35-44 45-54 55-64 65-74   75+ 
##  0.17  0.17  0.18  0.18  0.17  0.12
relfreqalcgp <-freqalcgp/nrow(esoph)
rnd_relfreqalcgp <-round(relfreqalcgp,digits = 2)
rnd_relfreqalcgp
## 
## 0-39g/day     40-79    80-119      120+ 
##      0.26      0.26      0.24      0.24
relfreqtobcgp <-freqtobgp/nrow(esoph)
rnd_relfreqtobcgp <-round(relfreqtobcgp,digits = 2)
rnd_relfreqtobcgp
## 
## 0-9g/day    10-19    20-29      30+ 
##     0.27     0.27     0.23     0.23

QUESTION 6

tablejf1 <-table(esoph$agegp, esoph$alcgp)
tablejf1
##        
##         0-39g/day 40-79 80-119 120+
##   25-34         4     4      3    4
##   35-44         4     4      4    3
##   45-54         4     4      4    4
##   55-64         4     4      4    4
##   65-74         4     3      4    4
##   75+           3     4      2    2
tablejf2 <-table(esoph$alcgp, esoph$tobgp)
tablejf2
##            
##             0-9g/day 10-19 20-29 30+
##   0-39g/day        6     6     5   6
##   40-79            6     6     6   5
##   80-119           6     6     4   5
##   120+             6     6     5   4
xtabjf1 <- xtabs(~esoph$agegp + esoph$alcg) 
xtabjf1
##            esoph$alcg
## esoph$agegp 0-39g/day 40-79 80-119 120+
##       25-34         4     4      3    4
##       35-44         4     4      4    3
##       45-54         4     4      4    4
##       55-64         4     4      4    4
##       65-74         4     3      4    4
##       75+           3     4      2    2
xtabjf2 <- xtabs(~esoph$agegp + esoph$alcg) 
xtabjf2
##            esoph$alcg
## esoph$agegp 0-39g/day 40-79 80-119 120+
##       25-34         4     4      3    4
##       35-44         4     4      4    3
##       45-54         4     4      4    4
##       55-64         4     4      4    4
##       65-74         4     3      4    4
##       75+           3     4      2    2

QUESTION 7A

diamonds
## # A tibble: 53,940 x 10
##    carat       cut color clarity depth table price     x     y     z
##    <dbl>     <ord> <ord>   <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
##  1  0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
##  2  0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
##  3  0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
##  4  0.29   Premium     I     VS2  62.4    58   334  4.20  4.23  2.63
##  5  0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
##  6  0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48
##  7  0.24 Very Good     I    VVS1  62.3    57   336  3.95  3.98  2.47
##  8  0.26 Very Good     H     SI1  61.9    55   337  4.07  4.11  2.53
##  9  0.22      Fair     E     VS2  65.1    61   337  3.87  3.78  2.49
## 10  0.23 Very Good     H     VS1  59.4    61   338  4.00  4.05  2.39
## # ... with 53,930 more rows
summary(diamonds)
##      carat               cut        color        clarity     
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655  
##                                     J: 2808   (Other): 2531  
##      depth           table           price             x         
##  Min.   :43.00   Min.   :43.00   Min.   :  326   Min.   : 0.000  
##  1st Qu.:61.00   1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710  
##  Median :61.80   Median :57.00   Median : 2401   Median : 5.700  
##  Mean   :61.75   Mean   :57.46   Mean   : 3933   Mean   : 5.731  
##  3rd Qu.:62.50   3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540  
##  Max.   :79.00   Max.   :95.00   Max.   :18823   Max.   :10.740  
##                                                                  
##        y                z         
##  Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 4.720   1st Qu.: 2.910  
##  Median : 5.710   Median : 3.530  
##  Mean   : 5.735   Mean   : 3.539  
##  3rd Qu.: 6.540   3rd Qu.: 4.040  
##  Max.   :58.900   Max.   :31.800  
## 
price <-diamonds$price
range(price)
## [1]   326 18823
carat <-diamonds$carat
range(carat)
## [1] 0.20 5.01
depth <-diamonds$depth
range(depth)
## [1] 43 79
table <-diamonds$table
range(table)
## [1] 43 95

QUESTION 7B

grpfrq <- tbl_df(cbind(diamonds$price, diamonds$depth))
grpfrq
## # A tibble: 53,940 x 2
##       V1    V2
##    <dbl> <dbl>
##  1   326  61.5
##  2   326  59.8
##  3   327  56.9
##  4   334  62.4
##  5   335  63.3
##  6   336  62.8
##  7   336  62.3
##  8   337  61.9
##  9   337  65.1
## 10   338  59.4
## # ... with 53,930 more rows

QUESTION 7C

pricehisto <- hist(diamonds$price)

pricehisto
## $breaks
##  [1]     0  1000  2000  3000  4000  5000  6000  7000  8000  9000 10000
## [12] 11000 12000 13000 14000 15000 16000 17000 18000 19000
## 
## $counts
##  [1] 14524  9683  6129  4225  4665  3163  2278  1668  1307  1076   934
## [12]   825   701   603   504   513   425   405   312
## 
## $density
##  [1] 2.692621e-04 1.795143e-04 1.136263e-04 7.832777e-05 8.648498e-05
##  [6] 5.863923e-05 4.223211e-05 3.092325e-05 2.423063e-05 1.994809e-05
## [11] 1.731554e-05 1.529477e-05 1.299592e-05 1.117909e-05 9.343715e-06
## [16] 9.510567e-06 7.879125e-06 7.508343e-06 5.784205e-06
## 
## $mids
##  [1]   500  1500  2500  3500  4500  5500  6500  7500  8500  9500 10500
## [12] 11500 12500 13500 14500 15500 16500 17500 18500
## 
## $xname
## [1] "diamonds$price"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
carathisto <- hist(diamonds$carat)

carathisto
## $breaks
##  [1] 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5
## 
## $counts
##  [1] 18932 17506 12060  3553  1763    94    23     4     4     0     1
## 
## $density
##  [1] 7.019651e-01 6.490916e-01 4.471635e-01 1.317390e-01 6.536893e-02
##  [6] 3.485354e-03 8.527994e-04 1.483129e-04 1.483129e-04 0.000000e+00
## [11] 3.707824e-05
## 
## $mids
##  [1] 0.25 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75 5.25
## 
## $xname
## [1] "diamonds$carat"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
depthhisto <- hist(diamonds$depth)

depthhisto
## $breaks
##  [1] 42 44 46 48 50 52 54 56 58 60 62 64 66 68 70 72 74 76 78 80
## 
## $counts
##  [1]     3     0     0     0     2    11    53   603  4953 25885 20560
## [12]  1504   278    65    17     3     0     0     3
## 
## $density
##  [1] 2.780868e-05 0.000000e+00 0.000000e+00 0.000000e+00 1.853912e-05
##  [6] 1.019651e-04 4.912866e-04 5.589544e-03 4.591212e-02 2.399425e-01
## [11] 1.905821e-01 1.394142e-02 2.576937e-03 6.025213e-04 1.575825e-04
## [16] 2.780868e-05 0.000000e+00 0.000000e+00 2.780868e-05
## 
## $mids
##  [1] 43 45 47 49 51 53 55 57 59 61 63 65 67 69 71 73 75 77 79
## 
## $xname
## [1] "diamonds$depth"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
tablehisto <- hist(diamonds$table)

tablehisto
## $breaks
##  [1] 40 45 50 55 60 65 70 75 80 85 90 95
## 
## $counts
##  [1]     2     4  9837 39251  4665   173     5     2     0     0     1
## 
## $density
##  [1] 7.415647e-06 1.483129e-05 3.647386e-02 1.455358e-01 1.729700e-02
##  [6] 6.414535e-04 1.853912e-05 7.415647e-06 0.000000e+00 0.000000e+00
## [11] 3.707824e-06
## 
## $mids
##  [1] 42.5 47.5 52.5 57.5 62.5 67.5 72.5 77.5 82.5 87.5 92.5
## 
## $xname
## [1] "diamonds$table"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"