data <- mtcars #write.csv(data. ‘data/data.csv’)
# Generate practical dataset
data <- mtcars
#write.csv(data. 'data/data.csv')
library(psych)
## Warning: 程辑包'psych'是用R版本4.1.3 来建造的
library(tidyverse)
## Warning: 程辑包'tidyverse'是用R版本4.1.3 来建造的
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: 程辑包'ggplot2'是用R版本4.1.3 来建造的
## Warning: 程辑包'tibble'是用R版本4.1.3 来建造的
## Warning: 程辑包'tidyr'是用R版本4.1.3 来建造的
## Warning: 程辑包'readr'是用R版本4.1.3 来建造的
## Warning: 程辑包'purrr'是用R版本4.1.3 来建造的
## Warning: 程辑包'dplyr'是用R版本4.1.3 来建造的
## Warning: 程辑包'stringr'是用R版本4.1.3 来建造的
## Warning: 程辑包'forcats'是用R版本4.1.3 来建造的
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sm)
## Warning: 程辑包'sm'是用R版本4.1.3 来建造的
## Package 'sm', version 2.2-5.7: type help(sm) for summary information
In order to provide an analysis, we used psych, tidyverse, sm packages
library(psych) library(tidyverse) library(sm)
#Loading Data Data that I used was ‘MICARS’ which includes information about car’s specification and performance
data <- read.csv('../data/data.csv')
data <- read.csv(‘…/data/data.csv’)
For the starting point I used descriptive statistics
summary(data)
## X mpg cyl disp
## Length:32 Min. :10.40 Min. :4.000 Min. : 71.1
## Class :character 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8
## Mode :character Median :19.20 Median :6.000 Median :196.3
## Mean :20.09 Mean :6.188 Mean :230.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0
## Max. :33.90 Max. :8.000 Max. :472.0
## hp drat wt qsec
## Min. : 52.0 Min. :2.760 Min. :1.513 Min. :14.50
## 1st Qu.: 96.5 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89
## Median :123.0 Median :3.695 Median :3.325 Median :17.71
## Mean :146.7 Mean :3.597 Mean :3.217 Mean :17.85
## 3rd Qu.:180.0 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90
## Max. :335.0 Max. :4.930 Max. :5.424 Max. :22.90
## vs am gear carb
## Min. :0.0000 Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4375 Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :1.0000 Max. :5.000 Max. :8.000
describe(data)
## vars n mean sd median trimmed mad min max range skew
## X* 1 32 16.50 9.38 16.50 16.50 11.86 1.00 32.00 31.00 0.00
## mpg 2 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61
## cyl 3 32 6.19 1.79 6.00 6.23 2.97 4.00 8.00 4.00 -0.17
## disp 4 32 230.72 123.94 196.30 222.52 140.48 71.10 472.00 400.90 0.38
## hp 5 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73
## drat 6 32 3.60 0.53 3.70 3.58 0.70 2.76 4.93 2.17 0.27
## wt 7 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42
## qsec 8 32 17.85 1.79 17.71 17.83 1.42 14.50 22.90 8.40 0.37
## vs 9 32 0.44 0.50 0.00 0.42 0.00 0.00 1.00 1.00 0.24
## am 10 32 0.41 0.50 0.00 0.38 0.00 0.00 1.00 1.00 0.36
## gear 11 32 3.69 0.74 4.00 3.62 1.48 3.00 5.00 2.00 0.53
## carb 12 32 2.81 1.62 2.00 2.65 1.48 1.00 8.00 7.00 1.05
## kurtosis se
## X* -1.31 1.66
## mpg -0.37 1.07
## cyl -1.76 0.32
## disp -1.21 21.91
## hp -0.14 12.12
## drat -0.71 0.09
## wt -0.02 0.17
## qsec 0.34 0.32
## vs -2.00 0.09
## am -1.92 0.09
## gear -1.07 0.13
## carb 1.26 0.29
summary(data) describe(data)
During the time that I review that data, I wondered the relationship between mpg(miles per gasollin as car performance)
table(data$vs)
##
## 0 1
## 18 14
data_a <- filter(data, vs == 0)
data_b <- filter(data, vs != 0)
mean(data_a$mpg)
## [1] 16.61667
mean(data_b$mpg)
## [1] 24.55714
table(data\(vs) data_a <- filter(data, vs == 0) data_b <- filter(data, vs != 0) mean(data_a\)mpg) mean(data_b$mpg)
The average of V-shape engin cars’ mpg was ‘r mean(data_a$mpg)q and the average of s-shape engin car’s mpg was ’r’ ## Visualization
hist(data$mpg)
hist(data$mpg)
plot(density(data$mpg))
plot(density(data$mpg))
sm.density.compare(data$mpg, data$vs, model='equal')
## Test of equal densities: p-value = 0
boxplot(mpg ~ vs, data = data)
boxplot(mpg ~ vs.data = data)
m1 <- t.test(mpg ~ vs, data = data)
m1
##
## Welch Two Sample t-test
##
## data: mpg by vs
## t = -4.6671, df = 22.716, p-value = 0.0001098
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -11.462508 -4.418445
## sample estimates:
## mean in group 0 mean in group 1
## 16.61667 24.55714
m1 <- t.test(mpg ~ vs, data = data) m1
m2 <- lm(mpg ~ vs, data = data)
summary(m2)
##
## Call:
## lm(formula = mpg ~ vs, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.757 -3.082 -1.267 2.828 9.383
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.617 1.080 15.390 8.85e-16 ***
## vs 7.940 1.632 4.864 3.42e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.581 on 30 degrees of freedom
## Multiple R-squared: 0.4409, Adjusted R-squared: 0.4223
## F-statistic: 23.66 on 1 and 30 DF, p-value: 3.416e-05
m2 <- lm(mpg ~ vs, data = data) summary(m2)