#### question 1  identify the data type of each variable in mtcars
##the variables of mtcars are : mpg, disp, hp, drat,wt,qsec, vs, am,gear,carb
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(datasets)
data(mtcars)
mtcars
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
glimpse(mtcars)
## Observations: 32
## Variables: 11
## $ mpg  <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19....
## $ cyl  <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, ...
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 1...
## $ hp   <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, ...
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.9...
## $ wt   <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3...
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 2...
## $ vs   <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ am   <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, ...
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, ...
#############
### Question 2  What variables are discrete or continuous

#  continuous - mpg, disp, hp, drat, wt and qsec 
#  discrete -    cyl, am, vs, gear and carb 
class(mtcars)
## [1] "data.frame"
##############

#### Question 3three variables (your selection) from mtcars using the R
##function, summary.

wt_summary <- (mtcars$wt)
wt_summary
##  [1] 2.620 2.875 2.320 3.215 3.440 3.460 3.570 3.190 3.150 3.440 3.440
## [12] 4.070 3.730 3.780 5.250 5.424 5.345 2.200 1.615 1.835 2.465 3.520
## [23] 3.435 3.840 3.845 1.935 2.140 1.513 3.170 2.770 3.570 2.780
mpg_summary <- (mtcars$mpg)
mpg_summary
##  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
## [29] 15.8 19.7 15.0 21.4
drat_summary <- (mtcars$drat)
drat_summary
##  [1] 3.90 3.90 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 3.92 3.07 3.07 3.07
## [15] 2.93 3.00 3.23 4.08 4.93 4.22 3.70 2.76 3.15 3.73 3.08 4.08 4.43 3.77
## [29] 4.22 3.62 3.54 4.11
########################
### Question 4 data type of the following three variables in esoph,agegp, alcgp, and tobgp.

library(datasets) 
data(esoph)
summary(esoph)
##    agegp          alcgp         tobgp        ncases         ncontrols    
##  25-34:15   0-39g/day:23   0-9g/day:24   Min.   : 0.000   Min.   : 1.00  
##  35-44:15   40-79    :23   10-19   :24   1st Qu.: 0.000   1st Qu.: 3.00  
##  45-54:16   80-119   :21   20-29   :20   Median : 1.000   Median : 6.00  
##  55-64:16   120+     :21   30+     :20   Mean   : 2.273   Mean   :11.08  
##  65-74:15                                3rd Qu.: 4.000   3rd Qu.:14.00  
##  75+  :11                                Max.   :17.000   Max.   :60.00
glimpse(esoph)
## Observations: 88
## Variables: 5
## $ agegp     <ord> 25-34, 25-34, 25-34, 25-34, 25-34, 25-34, 25-34, 25-...
## $ alcgp     <ord> 0-39g/day, 0-39g/day, 0-39g/day, 0-39g/day, 40-79, 4...
## $ tobgp     <ord> 0-9g/day, 10-19, 20-29, 30+, 0-9g/day, 10-19, 20-29,...
## $ ncases    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0...
## $ ncontrols <dbl> 40, 10, 6, 5, 27, 7, 4, 7, 2, 1, 2, 1, 1, 1, 2, 60, ...
##agegp = age group, alcgp = alcohol group, tobgp = tobacco group

#######################
### Question 5 frequency and relative frequency distributions of agegp, alcgp, and
#tobgp in esoph.

esoph_df <-tbl_df(esoph)
eso.freq <- table(esoph$agegp)
eso.freq
## 
## 25-34 35-44 45-54 55-64 65-74   75+ 
##    15    15    16    16    15    11
eso.freq <- table(esoph$alcgp)
eso.freq
## 
## 0-39g/day     40-79    80-119      120+ 
##        23        23        21        21
eso.freq <- table(esoph$tobgp)
eso.freq
## 
## 0-9g/day    10-19    20-29      30+ 
##       24       24       20       20
eso.relfreq <- eso.freq / nrow(esoph)
eso.relfreq
## 
##  0-9g/day     10-19     20-29       30+ 
## 0.2727273 0.2727273 0.2272727 0.2272727
round.eso.relfreq <- round (eso.relfreq, digits = 2)
round.eso.relfreq
## 
## 0-9g/day    10-19    20-29      30+ 
##     0.27     0.27     0.23     0.23
cbind(round.eso.relfreq)
##          round.eso.relfreq
## 0-9g/day              0.27
## 10-19                 0.27
## 20-29                 0.23
## 30+                   0.23
######################
### Quetion 6 joint frequency of agegp and alcgp as well as alcgp and tobgp in
# esoph

library(dplyr)
library(ggplot2)
crosstab <- table(esoph$agegp, esoph$tobgp)
crosstab
##        
##         0-9g/day 10-19 20-29 30+
##   25-34        4     4     3   4
##   35-44        4     4     4   3
##   45-54        4     4     4   4
##   55-64        4     4     4   4
##   65-74        4     4     4   3
##   75+          4     4     1   2
crosstab2 <- table(esoph$agegp, esoph$alcgp)
crosstab2
##        
##         0-39g/day 40-79 80-119 120+
##   25-34         4     4      3    4
##   35-44         4     4      4    3
##   45-54         4     4      4    4
##   55-64         4     4      4    4
##   65-74         4     3      4    4
##   75+           3     4      2    2
ftable(crosstab2)
##        0-39g/day 40-79 80-119 120+
##                                   
## 25-34          4     4      3    4
## 35-44          4     4      4    3
## 45-54          4     4      4    4
## 55-64          4     4      4    4
## 65-74          4     3      4    4
## 75+            3     4      2    2
table(esoph$agegp, esoph$alcgp)
##        
##         0-39g/day 40-79 80-119 120+
##   25-34         4     4      3    4
##   35-44         4     4      4    3
##   45-54         4     4      4    4
##   55-64         4     4      4    4
##   65-74         4     3      4    4
##   75+           3     4      2    2
#######################
###Quetion 7 Load the dataset, diamonds, a dataset built into R as long as you have the package,

library(ggplot2)
library(datasets)
data("diamonds")
diamonds
## # A tibble: 53,940 x 10
##    carat       cut color clarity depth table price     x     y     z
##    <dbl>     <ord> <ord>   <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
##  1  0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
##  2  0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
##  3  0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
##  4  0.29   Premium     I     VS2  62.4    58   334  4.20  4.23  2.63
##  5  0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
##  6  0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48
##  7  0.24 Very Good     I    VVS1  62.3    57   336  3.95  3.98  2.47
##  8  0.26 Very Good     H     SI1  61.9    55   337  4.07  4.11  2.53
##  9  0.22      Fair     E     VS2  65.1    61   337  3.87  3.78  2.49
## 10  0.23 Very Good     H     VS1  59.4    61   338  4.00  4.05  2.39
## # ... with 53,930 more rows
######################
#.7A Using R code, display the range of the following variables: price, carat,depth, and table.


diaprice <- diamonds$price
range(diaprice)
## [1]   326 18823
diacarat <- diamonds$carat
range(diacarat)
## [1] 0.20 5.01
diadepth <- diamonds$depth
range(diacarat)
## [1] 0.20 5.01
diatab <- diamonds$table
range(diatab)
## [1] 43 95
#########################
#7B Report the grouped frequency of any two of the variables identified in 7.a.
# variables used depth and table


library(dplyr)
library(ggplot2)

depth <- diamonds$depth
range(depth)
## [1] 43 79
depth_break <- seq(0, 70, by= 30)
depth_break
## [1]  0 30 60
depth.cut <- cut(depth, depth_break, right=FALSE)
depth.freq <- table(depth.cut)
depth.freq
## depth.cut
##  [0,30) [30,60) 
##       0    5114
cbind(depth.freq)
##         depth.freq
## [0,30)           0
## [30,60)       5114
library(ggplot2)
library(dplyr)

carat <- diamonds$carat
range(carat)
## [1] 0.20 5.01
carat_break <- seq(0, 5.1, by= 1.0)
carat_break
## [1] 0 1 2 3 4 5
carat.cut <- cut(carat, carat_break, right=FALSE)
carat.freq <- table(carat.cut)
carat.freq
## carat.cut
## [0,1) [1,2) [2,3) [3,4) [4,5) 
## 34880 16906  2114    34     5
cbind(carat.freq)
##       carat.freq
## [0,1)      34880
## [1,2)      16906
## [2,3)       2114
## [3,4)         34
## [4,5)          5
#######################
######Quetion 7C  histograms for vaiables in 7A

hist(diaprice)

hist(diatab)

hist(diacarat)

hist(diadepth)