#Searching for a dataset
data(package = .packages(all.available = TRUE))

Introduction

#Importing database in R
library(carData)
## Warning: package 'carData' was built under R version 4.3.2
mydata <- force(mtcars)
#Removing variables cyl, disp, drat, vs, gear and carb from the dataset and displaying first 6 rows of the dataset
mydata1 <- mydata[, !(names(mydata) %in% c("cyl", "disp", "drat", "vs", "gear", "carb"))]
head(mydata1)
##                    mpg  hp    wt  qsec am
## Mazda RX4         21.0 110 2.620 16.46  1
## Mazda RX4 Wag     21.0 110 2.875 17.02  1
## Datsun 710        22.8  93 2.320 18.61  1
## Hornet 4 Drive    21.4 110 3.215 19.44  0
## Hornet Sportabout 18.7 175 3.440 17.02  0
## Valiant           18.1 105 3.460 20.22  0
str(mydata1)
## 'data.frame':    32 obs. of  5 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...

Description

#Convert categorical variables to factors
mydata1$amF <- factor(mydata1$am,
                      levels = c(0, 1),
                      labels = c("Automatic", "Manual"))
#Descriptive statistics by group
library(psych)

result <- describeBy(mydata1$mpg, group = mydata1$amF)

print(result)
## 
##  Descriptive statistics by group 
## group: Automatic
##    vars  n  mean   sd median trimmed  mad  min  max range skew kurtosis   se
## X1    1 19 17.15 3.83   17.3   17.12 3.11 10.4 24.4    14 0.01     -0.8 0.88
## ------------------------------------------------------------ 
## group: Manual
##    vars  n  mean   sd median trimmed  mad min  max range skew kurtosis   se
## X1    1 13 24.39 6.17   22.8   24.38 6.67  15 33.9  18.9 0.05    -1.46 1.71

Interpretation

library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
ggplot(mydata1, aes(x = mpg, fill = amF)) +
  geom_histogram(position = position_dodge(width = 2), binwidth = 2, colour = "Black") +
  ylab("Frequency") +
  labs(fill = "amF")

H0: Miles per gallon (efficiency) are normally distributed within both groups

H1: Miles per gallon (efficiency) are not normally distributed within both groups

#Checking the normallity assumption with Shapiro Wilk test
library(rstatix)
## Warning: package 'rstatix' was built under R version 4.3.2
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
mydata1 %>%
  group_by(amF) %>%
  shapiro_test(mpg)
## # A tibble: 2 × 4
##   amF       variable statistic     p
##   <fct>     <chr>        <dbl> <dbl>
## 1 Automatic mpg          0.977 0.899
## 2 Manual    mpg          0.946 0.536
#Additionally, I will check the normallity assumption using ggqqplot (for small samples)
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.3.2
ggqqplot(mydata1, 
         "mpg",
         facet.by = "amF")

t.test(mydata1$mpg ~ mydata1$amF,
       paired = FALSE,
       var.equal = FALSE,
       alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  mydata1$mpg by mydata1$amF
## t = -3.7671, df = 18.332, p-value = 0.001374
## alternative hypothesis: true difference in means between group Automatic and group Manual is not equal to 0
## 95 percent confidence interval:
##  -11.280194  -3.209684
## sample estimates:
## mean in group Automatic    mean in group Manual 
##                17.14737                24.39231

H0: μ1 - μ2 = 0

H1: μ1 - μ2 =/ 0

library(effectsize)
## 
## Attaching package: 'effectsize'
## The following objects are masked from 'package:rstatix':
## 
##     cohens_d, eta_squared
## The following object is masked from 'package:psych':
## 
##     phi
effectsize::cohens_d(mydata1$mpg ~ mydata1$amF,
                     pooled_sd = FALSE)
## Cohen's d |         95% CI
## --------------------------
## -1.41     | [-2.26, -0.53]
## 
## - Estimated using un-pooled SD.
interpret_cohens_d(-1.41, rules = "sawilowsky2009")
## [1] "very large"
## (Rules: sawilowsky2009)

Conclusion: