#Eva Collins
#MAT 499 SP 25
#Notes from 04 e on canvas:
# t.test() 2-Sample test mtcars$mpg (am = 0 or 1)
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# creating the two groups
automatic_mpg <- mtcars$mpg[ mtcars$am == 0 ]
manual_mpg <- mtcars$mpg[ mtcars$am == 1 ]
# Remove # below to check the data
head(automatic_mpg)
## [1] 21.4 18.7 18.1 14.3 24.4 22.8
head(manual_mpg)
## [1] 21.0 21.0 22.8 32.4 30.4 33.9
# 2-sample test
t.test( automatic_mpg, manual_mpg )
##
## Welch Two Sample t-test
##
## data: automatic_mpg and manual_mpg
## t = -3.7671, df = 18.332, p-value = 0.001374
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.280194 -3.209684
## sample estimates:
## mean of x mean of y
## 17.14737 24.39231
#p-value: This is smaller than 0.05. Hence we reject the null hypothesis.
#That is, with p-value of 0.0014, which is smaller than the significance
#level of 0.05, we reject the null hypothesis.
#This suggests that there is sufficient statistical evidence to conclude
#the means of the two groups (automatic transmission and manual
#transmission) are different.
#Note: The confidence interval, (-11.28, -3.21), which is for the
#difference in means. It does not even contain 0 (which means no difference).
#This is in accordance with the p-value, indicating a significant
#difference at the 5% level,
#HW02
#Consider the dataset mtcars. It has a feature/variable called vs (for engine configuration).
#vs takes two possible values (0 or 1).
#vs = 0 indicates a V-shaped engine
#vs = 1 indicates a straight engine
#To get some ideas: V vs straight
#FYI. The cylinder configuration affects power, torque, vehicle balance, etc. (hence overall performance).
#We want to use t.test() to explore the differences of cars based on their engine configurations.
#Q1 (2.5 pt). Use a 2-sample test to determine if there is a
#significant difference between the mpg of the two groups based
#on their engine configurations (vs = 0 and vs = 1).
#Use a 5% level of significance.
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
v_shaped_engine <- mtcars$mpg[mtcars$vs==0]
straight_engine <- mtcars$mpg[mtcars$vs==1]
head(v_shaped_engine)
## [1] 21.0 21.0 18.7 14.3 16.4 17.3
head(straight_engine)
## [1] 22.8 21.4 18.1 24.4 22.8 19.2
t.test(v_shaped_engine,straight_engine)
##
## Welch Two Sample t-test
##
## data: v_shaped_engine and straight_engine
## t = -4.6671, df = 22.716, p-value = 0.0001098
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.462508 -4.418445
## sample estimates:
## mean of x mean of y
## 16.61667 24.55714
shapiro.test(mtcars$mpg[mtcars$vs==0])
##
## Shapiro-Wilk normality test
##
## data: mtcars$mpg[mtcars$vs == 0]
## W = 0.95151, p-value = 0.4491
shapiro.test(mtcars$mpg[mtcars$vs==1])
##
## Shapiro-Wilk normality test
##
## data: mtcars$mpg[mtcars$vs == 1]
## W = 0.91166, p-value = 0.1666
var.test(mtcars$mpg[mtcars$vs==0], mtcars$mpg[mtcars$vs==1])
##
## F test to compare two variances
##
## data: mtcars$mpg[mtcars$vs == 0] and mtcars$mpg[mtcars$vs == 1]
## F = 0.51515, num df = 17, denom df = 13, p-value = 0.1997
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1714935 1.4353527
## sample estimates:
## ratio of variances
## 0.5151485
#ANSWER: since the p value is less than 0.05, we reject the null hypothesis
#this indicates that the means of the v shaped engine and the straight engine are significantly different
#furthermore, 0 indicates no difference, and since the confidence interval does not contain 0,
#this further indicates that the means of the mpg values for the v shaped engine and the straight engine are
#significantly different according to this statistical method
#Q2 (2.5 pt). Use a 2-sample test to determine if there is a
#significant difference between the horsepower (variable hp) of
#the two groups based on their engine configurations
#(vs = 0 and vs = 1). Use a 5% level of significance.
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
v_shaped_engine <- mtcars$hp[mtcars$vs==0]
straight_engine <- mtcars$hp[mtcars$vs==1]
head(v_shaped_engine)
## [1] 110 110 175 245 180 180
head(straight_engine)
## [1] 93 110 105 62 95 123
t.test(v_shaped_engine,straight_engine)
##
## Welch Two Sample t-test
##
## data: v_shaped_engine and straight_engine
## t = 6.2908, df = 23.561, p-value = 1.82e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 66.06161 130.66854
## sample estimates:
## mean of x mean of y
## 189.72222 91.35714
shapiro.test(mtcars$hp[mtcars$vs==0])
##
## Shapiro-Wilk normality test
##
## data: mtcars$hp[mtcars$vs == 0]
## W = 0.9578, p-value = 0.5598
shapiro.test(mtcars$hp[mtcars$vs==1])
##
## Shapiro-Wilk normality test
##
## data: mtcars$hp[mtcars$vs == 1]
## W = 0.89932, p-value = 0.1102
var.test(mtcars$hp[mtcars$vs==0], mtcars$hp[mtcars$vs==1])
##
## F test to compare two variances
##
## data: mtcars$hp[mtcars$vs == 0] and mtcars$hp[mtcars$vs == 1]
## F = 6.0914, num df = 17, denom df = 13, p-value = 0.002002
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 2.027836 16.972425
## sample estimates:
## ratio of variances
## 6.091408
#ANSWER: since the p value is less than 0.05, we reject the null hypothesis
#this indicates that the means of the v shaped engine and the straight engine are significantly different
#furthermore, 0 indicates no difference, and since the confidence interval does not contain 0,
#this further indicates that the means of the hp values for the v shaped engine and the straight engine are
#significantly different according to this statistical method
#Q3 (2.5 pt). Use a 2-sample test to determine if there is a
#significant difference between the weight (variable wt) of the
#two groups based on their engine configurations (vs = 0 and vs = 1).
#Use a 5% level of significance.
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
v_shaped_engine <- mtcars$wt[mtcars$vs==0]
straight_engine <- mtcars$wt[mtcars$vs==1]
head(v_shaped_engine)
## [1] 2.620 2.875 3.440 3.570 4.070 3.730
head(straight_engine)
## [1] 2.320 3.215 3.460 3.190 3.150 3.440
t.test(v_shaped_engine,straight_engine)
##
## Welch Two Sample t-test
##
## data: v_shaped_engine and straight_engine
## t = 3.7638, df = 29.981, p-value = 0.0007281
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.4927181 1.6618216
## sample estimates:
## mean of x mean of y
## 3.688556 2.611286
shapiro.test(mtcars$wt[mtcars$vs==0])
##
## Shapiro-Wilk normality test
##
## data: mtcars$wt[mtcars$vs == 0]
## W = 0.91225, p-value = 0.09428
shapiro.test(mtcars$wt[mtcars$vs==1])
##
## Shapiro-Wilk normality test
##
## data: mtcars$wt[mtcars$vs == 1]
## W = 0.89879, p-value = 0.1083
var.test(mtcars$wt[mtcars$vs==0], mtcars$wt[mtcars$vs==1])
##
## F test to compare two variances
##
## data: mtcars$wt[mtcars$vs == 0] and mtcars$wt[mtcars$vs == 1]
## F = 1.5986, num df = 17, denom df = 13, p-value = 0.3958
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5321785 4.4541869
## sample estimates:
## ratio of variances
## 1.598609
#ANSWER: since the p value is less than 0.05, we reject the null hypothesis
#this indicates that the means of the v shaped engine and the straight engine are significantly different
#furthermore, 0 indicates no difference, and since the confidence interval does not contain 0,
#this further indicates that the means of the wt values for the v shaped engine and the straight engine are
#significantly different according to this statistical method
#Q4 (2.5 pt). Use a 2-sample test to determine if there is a
#significant difference between the qsec (1/4 mile time) of the
#two groups based on their engine configurations (vs = 0 and vs = 1).
#Use a 5% level of significance.
#Note. qsec = The time to travel 1/4 mile from a standstill.
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
v_shaped_engine <- mtcars$qsec[mtcars$vs==0]
straight_engine <- mtcars$qsec[mtcars$vs==1]
head(v_shaped_engine)
## [1] 16.46 17.02 17.02 15.84 17.40 17.60
head(straight_engine)
## [1] 18.61 19.44 20.22 20.00 22.90 18.30
t.test(v_shaped_engine,straight_engine)
##
## Welch Two Sample t-test
##
## data: v_shaped_engine and straight_engine
## t = -5.9433, df = 24.648, p-value = 3.522e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.555083 -1.724282
## sample estimates:
## mean of x mean of y
## 16.69389 19.33357
shapiro.test(mtcars$qsec[mtcars$vs==0])
##
## Shapiro-Wilk normality test
##
## data: mtcars$qsec[mtcars$vs == 0]
## W = 0.90078, p-value = 0.05928
shapiro.test(mtcars$qsec[mtcars$vs==1])
##
## Shapiro-Wilk normality test
##
## data: mtcars$qsec[mtcars$vs == 1]
## W = 0.89527, p-value = 0.0963
var.test(mtcars$qsec[mtcars$vs==0], mtcars$qsec[mtcars$vs==1])
##
## F test to compare two variances
##
## data: mtcars$qsec[mtcars$vs == 0] and mtcars$qsec[mtcars$vs == 1]
## F = 0.64992, num df = 17, denom df = 13, p-value = 0.4002
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2163586 1.8108615
## sample estimates:
## ratio of variances
## 0.6499187
#ANSWER: since the p value is less than 0.05, we reject the null hypothesis
#this indicates that the means of the v shaped engine and the straight engine are significantly different
#furthermore, 0 indicates no difference, and since the confidence interval does not contain 0,
#this further indicates that the means of the qsec values for the v shaped engine and the straight engine are
#significantly different according to this statistical method
#Optional (2 pt; bonus): For each of the questions:
#Check if the underlying distributions are normally distributed.
#Check if the distributions under comparison have equal/similar variances.
#(Even if they don't do, go ahead with t.test() above.)
#Optional (1 pts; bonus). Visualization of mtcars.
#Analyze the mtcars dataset through data visualization.
#It's up to you. Your goal is to create informative graphics/plots to effectively
#explore and illustrate the dataset's key features and relationships.
#Feel free to use AI to help. Google, e.g., "beautiful and effective visualization on mtcars".
#Try a variety of AI tools (Copilot, Gemini, ChatGPT,
#Perplexity.ai--they all offer FREE accounts; in fact, with your UIS account, you can access to Copilot & Gemini).
#Some will be more helpful than others.
#Try asking one tool for prompts (for this question), and use it in another AI tool.