Fuel Efficiency and Induction Type

Is Turbocharging effective?

Ben Clingin. s3020403

Last updated: 16 October, 2021

Introduction

Problem Statement

Turbo Engine

Non-Turbo Engine

Data

Data Cont

Descriptive Statistics and Visualisation

Descriptive Statistics and Visualisation cont.

Summary Statistics by Induction Type
turbocharged MIN Q1 Median Q3 Max Mean SD N Missing
NO 15.00 20.84 23.00 27.31 36.62 24.08 4.41 2081 0
YES 13.61 21.40 24.01 26.49 38.58 24.07 3.66 1962 0

Descriptive Statistics and Visualisation cont

Summary Statistics by Induction Type outliers removed
turbocharged MIN Q1 Median Q3 Max Mean SD N Missing
NO 15.00 20.84 23 27.31 36.62 24.08 4.41 2081 0
YES 14.13 21.38 24 26.44 33.92 24.01 3.54 1946 0

Hypothesis Testing

Normality

turbompg$combined_MPG %>% qqPlot(dist="norm", main = "Turbocharged qqPlot", ylab = "MPG")

## [1] 1692 1855
nampg$combined_MPG %>% qqPlot(dist="norm", main = "Non-turbocharged qqPlot", ylab = "MPG")

## [1]  111 1567

Variance Homogeneity

\[\sigma^2_{1} \neq \sigma^2_{2}\]

lev <- leveneTest(combined_MPG ~ turbocharged, data = vehfinaloutlierremoved)
lev %>% kable(caption = "Levene Test Summary") %>%
  kable_classic(full_width = T) %>% 
  kable_styling(bootstrap_options = c("striped", "condensed"))
Levene Test Summary
Df F value Pr(>F)
Induction Type Group 1 79.5 7.23e-19
4025 NA NA

Hypthesis Testing Cont.

t.test(vehfinaloutlierremoved$combined_MPG ~ vehfinaloutlierremoved$turbocharged,
       data = vehfinaloutlierremoved,
       var.equal = FALSE,
       alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  vehfinaloutlierremoved$combined_MPG by vehfinaloutlierremoved$turbocharged
## t = 0.55292, df = 3936.5, p-value = 0.5803
## alternative hypothesis: true difference in means between group NO and group YES is not equal to 0
## 95 percent confidence interval:
##  -0.1767867  0.3156700
## sample estimates:
##  mean in group NO mean in group YES 
##          24.07947          24.01003

Discussion

Appendix 1 - Full R Code Outputs

## Descriptive Statistics R Codes

## Bar chart of cylinder count data
barlabels <- vehfinal %>% group_by(cylinders) %>% summarise(count = n()) %>% mutate(per = round(count / sum(count) * 100,2))

## Bar Chart with % total labels.
barlabels %>% ggplot(aes(cylinders, fill = cylinders))+
  geom_bar(aes(x=cylinders, y=count), stat="identity")+
  ggtitle("Cylinder Count (with total proportion)")+
  theme_bw()+
  theme(legend.position = c(.75,.8),legend.justification = c(.7,1),plot.title = element_text(hjust = 0.5))+
  scale_fill_brewer(palette = "Blues")+
  geom_text(aes(label=paste0(barlabels$per, "%"), y=barlabels$count), vjust=-.25)
## filter for 4 and 6 cylinders only
vehfinalfilt <- vehfinal %>% filter(cylinders == c("4","6"))

## Summary Stats for 4 and 6 cylinder vehicles grouped by induction type.
vehfinalfilt %>% group_by(turbocharged) %>% summarise(MIN = round(min(combined_MPG, na.rm = TRUE),2),
                                        Q1 = round(quantile(combined_MPG, probs = 0.25, na.rm = TRUE),2),
                                        Median = round(median(combined_MPG, na.rm = TRUE),2),
                                        Q3 = round(quantile(combined_MPG, probs = 0.75, na.rm = TRUE),2),
                                        Max = round(max(combined_MPG, na.rm = TRUE),2),
                                        Mean = round(mean(combined_MPG, na.rm = TRUE),2),
                                        SD = round(sd(combined_MPG, na.rm = TRUE),2),
                                        N = n(),
                                        Missing = sum(is.na(combined_MPG))) %>%
  kbl(caption = "Summary Statistics by Induction Type") %>% 
  kable_classic(full_width = T) %>% 
  kable_styling(bootstrap_options = c("striped", "condensed"))

## boxplot of data with outliers.
vehfinalfilt %>%ggplot(aes(x=combined_MPG, y = turbocharged, fill = turbocharged))+
           geom_boxplot()+
  ggtitle("Boxplot of Combined MPG by Induction Type")+
    theme_bw()+
  theme(plot.title = element_text(hjust = 0.5))+
  scale_fill_brewer(palette = "Blues")

Appendix 1 - Full R Code Outputs cont.

## outlier filtering
boxplotoutliers <- boxplot(vehfinalfilt$combined_MPG ~ vehfinalfilt$turbocharged, plot=FALSE)$out

vehfinaloutlierremoved <- vehfinalfilt %>% filter(!(combined_MPG %in% boxplotoutliers))

## Summary statistics with outliers removed
vehfinaloutlierremoved %>% group_by(turbocharged) %>% summarise(MIN = round(min(combined_MPG, na.rm = TRUE),2),
                                        Q1 = round(quantile(combined_MPG, probs = 0.25, na.rm = TRUE),2),
                                        Median = round(median(combined_MPG, na.rm = TRUE),2),
                                        Q3 = round(quantile(combined_MPG, probs = 0.75, na.rm = TRUE),2),
                                        Max = round(max(combined_MPG, na.rm = TRUE),2),
                                        Mean = round(mean(combined_MPG, na.rm = TRUE),2),
                                        SD = round(sd(combined_MPG, na.rm = TRUE),2),
                                        N = n(),
                                        Missing = sum(is.na(combined_MPG))) %>%
  kable(caption = "Summary Statistics by Induction Type outliers removed") %>% 
  kable_classic(full_width = T) %>% 
  kable_styling(bootstrap_options = c("striped", "condensed"))

## boxplot with outliers removed.
vehfinaloutlierremoved %>%ggplot(aes(x=combined_MPG, y = turbocharged, fill = turbocharged))+
           geom_boxplot()+
  ggtitle("Boxplot of Combined MPG by Induction Type outliers removed")+
    theme_bw()+
  theme(plot.title = element_text(hjust = 0.5))+
  scale_fill_brewer(palette = "Blues")
## Hypothesis Testing data cleaning
## Normality data filter
turbompg <- vehfinaloutlierremoved %>% filter(turbocharged == "YES") 
nampg <- vehfinaloutlierremoved %>% filter(turbocharged == "NO")

## Levene test data formatting
lev$`Pr(>F)` <- format(lev$`Pr(>F)`, digits = 3)
lev$`F value` <- format(lev$`F value`, digits = 3)
attributes(lev)$row.names <- c("Induction Type Group", "") 

References