Hand Module 4 Code

# download packages
library(ggplot2)
library(dplyr)
mean_mpg <- mean(mtcars$mpg)

# Task 1
# load data set mtcars 
data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
group_data <- mtcars %>% group_by(cyl)
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
# calculating mean 
mean_mpg <- mean(mtcars$mpg) + 
  print(mean_mpg)
## [1] 20.09062
# mean_mpg = 20.0962
# calculating Standard Error from SD 
sd_mpg <- sd(mtcars$mpg, na.rm = TRUE) 
# creating a sample size vriable 
n <- sum(!is.na(mtcars$mpg))
se_mpg <- sd_mpg / sqrt(n)
print(se_mpg)
## [1] 1.065424
# SE_mpg = 1.065
# summarize data in new dataframe 
data_summary <- mtcars %>% group_by(cyl) %>% summarise(se_mpg = sd_mpg / sqrt(n), mean_mpg = mean(mpg))
print(data_summary)
## # A tibble: 3 × 3
##     cyl se_mpg mean_mpg
##   <dbl>  <dbl>    <dbl>
## 1     4   1.07     26.7
## 2     6   1.07     19.7
## 3     8   1.07     15.1
# Task 2 creating a bar plot 
ggplot(data_summary, aes(x = factor(cyl), y = mean_mpg,)) + geom_col(fill = c("skyblue", "orange", "green"), width = 0.7) +
  geom_errorbar(aes(ymin = mean_mpg - se_mpg, ymax = mean_mpg + se_mpg), width = 0.25, size = 2) + 
  theme_classic() + 
  labs(title = "Bar Plot of Mean MPG by Cylinders", x = "Number of Cylinders", y = "Mean MPG")

#messed up on the summary stats at first but fixed it