#Generate practice dataset (once you generate, then annotate it)
#data <- mtcars
#write.csv(data, 'data/data.csv')
library(psych)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(sm)
## Package 'sm', version 2.2-5.7: type help(sm) for summary information
data <- read.csv('data/data.csv')
summary(data)
## X mpg cyl disp
## Length:32 Min. :10.40 Min. :4.000 Min. : 71.1
## Class :character 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8
## Mode :character Median :19.20 Median :6.000 Median :196.3
## Mean :20.09 Mean :6.188 Mean :230.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0
## Max. :33.90 Max. :8.000 Max. :472.0
## hp drat wt qsec
## Min. : 52.0 Min. :2.760 Min. :1.513 Min. :14.50
## 1st Qu.: 96.5 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89
## Median :123.0 Median :3.695 Median :3.325 Median :17.71
## Mean :146.7 Mean :3.597 Mean :3.217 Mean :17.85
## 3rd Qu.:180.0 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90
## Max. :335.0 Max. :4.930 Max. :5.424 Max. :22.90
## vs am gear carb
## Min. :0.0000 Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4375 Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :1.0000 Max. :5.000 Max. :8.000
describe(data)
## vars n mean sd median trimmed mad min max range skew
## X* 1 32 16.50 9.38 16.50 16.50 11.86 1.00 32.00 31.00 0.00
## mpg 2 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61
## cyl 3 32 6.19 1.79 6.00 6.23 2.97 4.00 8.00 4.00 -0.17
## disp 4 32 230.72 123.94 196.30 222.52 140.48 71.10 472.00 400.90 0.38
## hp 5 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73
## drat 6 32 3.60 0.53 3.70 3.58 0.70 2.76 4.93 2.17 0.27
## wt 7 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42
## qsec 8 32 17.85 1.79 17.71 17.83 1.42 14.50 22.90 8.40 0.37
## vs 9 32 0.44 0.50 0.00 0.42 0.00 0.00 1.00 1.00 0.24
## am 10 32 0.41 0.50 0.00 0.38 0.00 0.00 1.00 1.00 0.36
## gear 11 32 3.69 0.74 4.00 3.62 1.48 3.00 5.00 2.00 0.53
## carb 12 32 2.81 1.62 2.00 2.65 1.48 1.00 8.00 7.00 1.05
## kurtosis se
## X* -1.31 1.66
## mpg -0.37 1.07
## cyl -1.76 0.32
## disp -1.21 21.91
## hp -0.14 12.12
## drat -0.71 0.09
## wt -0.02 0.17
## qsec 0.34 0.32
## vs -2.00 0.09
## am -1.92 0.09
## gear -1.07 0.13
## carb 1.26 0.29
data_a <- filter(data, am == 0)
data_b <- filter(data, am == 1)
hist(data$qsec)
plot(density(data$qsec))
sm.density.compare(data$qsec, data$am, model = "equal")
## Test of equal densities: p-value = 0.46
#am.f <- factor(data$am, levels = c(0,1),
# labels = c('V shape', 'S shape'))
#colfill<-c(2:(2+length(levels(am.f))))
#legend(locator(1), levels(am.f), fill=colfill)
boxplot(qsec ~ am, data = data)
data %>%
ggplot(
aes(x=am, y=qsec, group = am)
) +
geom_boxplot() +
geom_jitter(color="black", size=0.4, alpha=0.9)
t.test(qsec ~ am, data = data) # where qsec is numeric and am is a binary factor
##
## Welch Two Sample t-test
##
## data: qsec by am
## t = 1.2878, df = 25.534, p-value = 0.2093
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.4918522 2.1381679
## sample estimates:
## mean in group 0 mean in group 1
## 18.18316 17.36000
m1 <- lm(qsec ~ am, data = data)
summary(m1)
##
## Call:
## lm(formula = qsec ~ am, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8600 -0.9583 -0.3516 1.2517 4.7168
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.1832 0.4056 44.833 <2e-16 ***
## am -0.8232 0.6363 -1.294 0.206
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.768 on 30 degrees of freedom
## Multiple R-squared: 0.05284, Adjusted R-squared: 0.02126
## F-statistic: 1.674 on 1 and 30 DF, p-value: 0.2057