mpg, disp, hp, drat, wt, qsec are all numeric. cyl, vs, am, gear, carb are all integers The first column with the names of the cars is a character data type.
names(mtcars)
[1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
[11] "carb"
class(mtcars$mpg)
[1] "numeric"
class(mtcars$cyl)
[1] "numeric"
class(mtcars$disp)
[1] "numeric"
class(mtcars$hp)
[1] "numeric"
class(mtcars$drat)
[1] "numeric"
mpg, disp, hp, drat, wt, qsec are all continuous variables. cyl, vs, am, gear, and carb are all discrete variables
#Summary of mpg
mpg_summary <- summary(mtcars$mpg)
mpg_summary
Min. 1st Qu. Median Mean 3rd Qu. Max.
10.40 15.43 19.20 20.09 22.80 33.90
#Summary of disp
displacement_summary <- summary(mtcars$disp)
displacement_summary
Min. 1st Qu. Median Mean 3rd Qu. Max.
71.1 120.8 196.3 230.7 326.0 472.0
#Summary of wt
weight_summary <- summary(mtcars$wt)
weight_summary
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.513 2.581 3.325 3.217 3.610 5.424
agegp, alcgp, and tobgp are all continuous. ncases and ncontrols are both discrete
class(esoph$agegp)
[1] "ordered" "factor"
class(esoph$alcgp)
[1] "ordered" "factor"
class(esoph$tobgp)
[1] "ordered" "factor"
class(esoph$ncases)
[1] "numeric"
class(esoph$ncontrols)
[1] "numeric"
#Frequency and relative distribution of the variable agegp
frequency_agegp <- (table(esoph$agegp))
freq_dist_agegp <- frequency_agegp / nrow(esoph)
rnd_agegp<-round(freq_dist_agegp, digits = 2)
cbind(frequency_agegp,rnd_agegp)
frequency_agegp rnd_agegp
25-34 15 0.17
35-44 15 0.17
45-54 16 0.18
55-64 16 0.18
65-74 15 0.17
75+ 11 0.12
#Frequency and relative distribution of the variable alcgp
frequency_alcgp <- (table(esoph$alcgp))
freq_dist_alcgp <- frequency_alcgp / nrow((esoph))
rnd_alcgp<-round(freq_dist_alcgp, digits = 2)
cbind(frequency_alcgp, rnd_alcgp)
frequency_alcgp rnd_alcgp
0-39g/day 23 0.26
40-79 23 0.26
80-119 21 0.24
120+ 21 0.24
#Frequency and relative distribution of the variable tobgp
frequency_tobgp <- (table(esoph$tobgp))
freq_dist_tobgp <- frequency_tobgp / nrow(esoph)
rnd_tobgp<-round(freq_dist_tobgp, digits = 2)
cbind(frequency_tobgp,rnd_tobgp)
frequency_tobgp rnd_tobgp
0-9g/day 24 0.27
10-19 24 0.27
20-29 20 0.23
30+ 20 0.23
#Joint frequency of agegp and alcgp
table(esoph$agegp, esoph$alcgp)
0-39g/day 40-79 80-119 120+
25-34 4 4 3 4
35-44 4 4 4 3
45-54 4 4 4 4
55-64 4 4 4 4
65-74 4 3 4 4
75+ 3 4 2 2
#joint frequency of alcgp and tobgp
table(esoph$alcgp, esoph$tobgp)
0-9g/day 10-19 20-29 30+
0-39g/day 6 6 5 6
40-79 6 6 6 5
80-119 6 6 4 5
120+ 6 6 5 4
#Range of diamond price
library(ggplot2)
range(diamonds$price)
[1] 326 18823
#Range of diamond carat
range(diamonds$carat)
[1] 0.20 5.01
#Range of diamond depth
range(diamonds$depth)
[1] 43 79
#Range of diamond table
range(diamonds$table)
[1] 43 95
#Grouped frequency of diamond price and diamond carat
library(dplyr)
tbl_df(cbind(diamonds$price, diamonds$carat))
# A tibble: 53,940 x 2
V1 V2
<dbl> <dbl>
1 326 0.23
2 326 0.21
3 327 0.23
4 334 0.29
5 335 0.31
6 336 0.24
7 336 0.24
8 337 0.26
9 337 0.22
10 338 0.23
# ... with 53,930 more rows