#Eva Collins
#MAT 499 SP 25
#Notes from 04 e on canvas:
# t.test() 2-Sample test mtcars$mpg (am = 0 or 1)
data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
# creating the two groups
automatic_mpg <- mtcars$mpg[ mtcars$am == 0 ]
manual_mpg <- mtcars$mpg[ mtcars$am == 1 ]
# Remove # below to check the data
head(automatic_mpg)  
## [1] 21.4 18.7 18.1 14.3 24.4 22.8
head(manual_mpg)
## [1] 21.0 21.0 22.8 32.4 30.4 33.9
# 2-sample test 
t.test( automatic_mpg, manual_mpg )
## 
##  Welch Two Sample t-test
## 
## data:  automatic_mpg and manual_mpg
## t = -3.7671, df = 18.332, p-value = 0.001374
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -11.280194  -3.209684
## sample estimates:
## mean of x mean of y 
##  17.14737  24.39231
#p-value: This is smaller than 0.05. Hence we reject the null hypothesis. 
#That is, with p-value of 0.0014, which is smaller than the significance 
#level of 0.05, we reject the null hypothesis. 
#This suggests that there is sufficient statistical evidence to conclude 
#the means of the two groups (automatic transmission and manual 
#transmission) are different.
#Note: The confidence interval, (-11.28, -3.21), which is for the 
#difference in means. It does not even contain 0 (which means no difference). 
#This is in accordance with the p-value, indicating a significant 
#difference at the 5% level,

#HW02
#Consider the dataset mtcars. It has a feature/variable called vs (for engine configuration). 
#vs takes two possible values (0 or 1).
#vs = 0 indicates a V-shaped engine
#vs = 1 indicates a straight engine
#To get some ideas: V vs straight
#FYI. The cylinder configuration affects power, torque, vehicle balance, etc. (hence overall performance).
#We want to use t.test() to explore the differences of cars based on their engine configurations.

#Q1 (2.5 pt). Use a 2-sample test to determine if there is a 
#significant difference between the mpg of the two groups based 
#on their engine configurations (vs = 0 and vs = 1). 
#Use a 5% level of significance.
data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
v_shaped_engine <- mtcars$mpg[mtcars$vs==0]
straight_engine <- mtcars$mpg[mtcars$vs==1]
head(v_shaped_engine)
## [1] 21.0 21.0 18.7 14.3 16.4 17.3
head(straight_engine)
## [1] 22.8 21.4 18.1 24.4 22.8 19.2
t.test(v_shaped_engine,straight_engine)
## 
##  Welch Two Sample t-test
## 
## data:  v_shaped_engine and straight_engine
## t = -4.6671, df = 22.716, p-value = 0.0001098
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -11.462508  -4.418445
## sample estimates:
## mean of x mean of y 
##  16.61667  24.55714
shapiro.test(mtcars$mpg[mtcars$vs==0])
## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars$mpg[mtcars$vs == 0]
## W = 0.95151, p-value = 0.4491
shapiro.test(mtcars$mpg[mtcars$vs==1])
## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars$mpg[mtcars$vs == 1]
## W = 0.91166, p-value = 0.1666
var.test(mtcars$mpg[mtcars$vs==0], mtcars$mpg[mtcars$vs==1])
## 
##  F test to compare two variances
## 
## data:  mtcars$mpg[mtcars$vs == 0] and mtcars$mpg[mtcars$vs == 1]
## F = 0.51515, num df = 17, denom df = 13, p-value = 0.1997
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1714935 1.4353527
## sample estimates:
## ratio of variances 
##          0.5151485
#ANSWER: since the p value is less than 0.05, we reject the null hypothesis
#this indicates that the means of the v shaped engine and the straight engine are significantly different
#furthermore, 0 indicates no difference, and since the confidence interval does not contain 0, 
#this further indicates that the means of the mpg values for the v shaped engine and the straight engine are 
#significantly different according to this statistical method

#Q2 (2.5 pt). Use a 2-sample test to determine if there is a 
#significant difference between the horsepower (variable hp) of
#the two groups based on their engine configurations 
#(vs = 0 and vs = 1). Use a 5% level of significance.
data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
v_shaped_engine <- mtcars$hp[mtcars$vs==0]
straight_engine <- mtcars$hp[mtcars$vs==1]
head(v_shaped_engine)
## [1] 110 110 175 245 180 180
head(straight_engine)
## [1]  93 110 105  62  95 123
t.test(v_shaped_engine,straight_engine)
## 
##  Welch Two Sample t-test
## 
## data:  v_shaped_engine and straight_engine
## t = 6.2908, df = 23.561, p-value = 1.82e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   66.06161 130.66854
## sample estimates:
## mean of x mean of y 
## 189.72222  91.35714
shapiro.test(mtcars$hp[mtcars$vs==0])
## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars$hp[mtcars$vs == 0]
## W = 0.9578, p-value = 0.5598
shapiro.test(mtcars$hp[mtcars$vs==1])
## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars$hp[mtcars$vs == 1]
## W = 0.89932, p-value = 0.1102
var.test(mtcars$hp[mtcars$vs==0], mtcars$hp[mtcars$vs==1])
## 
##  F test to compare two variances
## 
## data:  mtcars$hp[mtcars$vs == 0] and mtcars$hp[mtcars$vs == 1]
## F = 6.0914, num df = 17, denom df = 13, p-value = 0.002002
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   2.027836 16.972425
## sample estimates:
## ratio of variances 
##           6.091408
#ANSWER: since the p value is less than 0.05, we reject the null hypothesis
#this indicates that the means of the v shaped engine and the straight engine are significantly different
#furthermore, 0 indicates no difference, and since the confidence interval does not contain 0, 
#this further indicates that the means of the hp values for the v shaped engine and the straight engine are 
#significantly different according to this statistical method

#Q3 (2.5 pt). Use a 2-sample test to determine if there is a 
#significant difference between the weight (variable wt) of the 
#two groups based on their engine configurations (vs = 0 and vs = 1). 
#Use a 5% level of significance.
data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
v_shaped_engine <- mtcars$wt[mtcars$vs==0]
straight_engine <- mtcars$wt[mtcars$vs==1]
head(v_shaped_engine)
## [1] 2.620 2.875 3.440 3.570 4.070 3.730
head(straight_engine)
## [1] 2.320 3.215 3.460 3.190 3.150 3.440
t.test(v_shaped_engine,straight_engine)
## 
##  Welch Two Sample t-test
## 
## data:  v_shaped_engine and straight_engine
## t = 3.7638, df = 29.981, p-value = 0.0007281
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.4927181 1.6618216
## sample estimates:
## mean of x mean of y 
##  3.688556  2.611286
shapiro.test(mtcars$wt[mtcars$vs==0])
## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars$wt[mtcars$vs == 0]
## W = 0.91225, p-value = 0.09428
shapiro.test(mtcars$wt[mtcars$vs==1])
## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars$wt[mtcars$vs == 1]
## W = 0.89879, p-value = 0.1083
var.test(mtcars$wt[mtcars$vs==0], mtcars$wt[mtcars$vs==1])
## 
##  F test to compare two variances
## 
## data:  mtcars$wt[mtcars$vs == 0] and mtcars$wt[mtcars$vs == 1]
## F = 1.5986, num df = 17, denom df = 13, p-value = 0.3958
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5321785 4.4541869
## sample estimates:
## ratio of variances 
##           1.598609
#ANSWER: since the p value is less than 0.05, we reject the null hypothesis
#this indicates that the means of the v shaped engine and the straight engine are significantly different
#furthermore, 0 indicates no difference, and since the confidence interval does not contain 0, 
#this further indicates that the means of the wt values for the v shaped engine and the straight engine are 
#significantly different according to this statistical method

#Q4 (2.5 pt). Use a 2-sample test to determine if there is a 
#significant difference between the qsec (1/4 mile time) of the 
#two groups based on their engine configurations (vs = 0 and vs = 1). 
#Use a 5% level of significance. 
#Note. qsec = The time to travel 1/4 mile from a standstill.
data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
v_shaped_engine <- mtcars$qsec[mtcars$vs==0]
straight_engine <- mtcars$qsec[mtcars$vs==1]
head(v_shaped_engine)
## [1] 16.46 17.02 17.02 15.84 17.40 17.60
head(straight_engine)
## [1] 18.61 19.44 20.22 20.00 22.90 18.30
t.test(v_shaped_engine,straight_engine)
## 
##  Welch Two Sample t-test
## 
## data:  v_shaped_engine and straight_engine
## t = -5.9433, df = 24.648, p-value = 3.522e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.555083 -1.724282
## sample estimates:
## mean of x mean of y 
##  16.69389  19.33357
shapiro.test(mtcars$qsec[mtcars$vs==0])
## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars$qsec[mtcars$vs == 0]
## W = 0.90078, p-value = 0.05928
shapiro.test(mtcars$qsec[mtcars$vs==1])
## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars$qsec[mtcars$vs == 1]
## W = 0.89527, p-value = 0.0963
var.test(mtcars$qsec[mtcars$vs==0], mtcars$qsec[mtcars$vs==1])
## 
##  F test to compare two variances
## 
## data:  mtcars$qsec[mtcars$vs == 0] and mtcars$qsec[mtcars$vs == 1]
## F = 0.64992, num df = 17, denom df = 13, p-value = 0.4002
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2163586 1.8108615
## sample estimates:
## ratio of variances 
##          0.6499187
#ANSWER: since the p value is less than 0.05, we reject the null hypothesis
#this indicates that the means of the v shaped engine and the straight engine are significantly different
#furthermore, 0 indicates no difference, and since the confidence interval does not contain 0, 
#this further indicates that the means of the qsec values for the v shaped engine and the straight engine are 
#significantly different according to this statistical method

#Optional (2 pt; bonus): For each of the questions:
  
#Check if the underlying distributions are normally distributed.
#Check if the distributions under comparison have equal/similar variances. 
#(Even if they don't do, go ahead with t.test() above.)

#Optional (1 pts; bonus). Visualization of mtcars. 

#Analyze the mtcars dataset through data visualization. 
#It's up to you. Your goal is to create informative graphics/plots to effectively 
#explore and illustrate the dataset's key features and relationships. 
#Feel free to use AI to help. Google, e.g., "beautiful and effective visualization on mtcars".
#Try a variety of AI tools (Copilot, Gemini, ChatGPT, 
#Perplexity.ai--they all offer FREE accounts; in fact, with your UIS account, you can access to Copilot & Gemini). 
#Some will be more helpful than others. 
#Try asking one tool for prompts (for this question), and use it in another AI tool.