Q :-

A college is trying to determine if there is a significant difference in the mean GMAT score of students from different undergraduate backgrounds who apply to the MBA program. The Excel file GMAT Scores contain data from a sample of students. What conclusion can be reached using ANOVA?

library(readxl)
df <- read_excel("/home/student/RCodes/Stats With R/Data_Files/New.xlsx",1,range = "A4:B42")
av <- aov(Scores ~ Major,data = df)
anova(av)
## Analysis of Variance Table
## 
## Response: Scores
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## Major      2 12180.9  6090.4  306.93 < 2.2e-16 ***
## Residuals 35   694.5    19.8                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(av)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Scores ~ Major, data = df)
## 
## $Major
##                        diff       lwr       upr p adj
## Liberal Arts-Business -16.0 -21.11069 -10.88931     0
## Sciences-Business      28.5  24.53213  32.46787     0
## Sciences-Liberal Arts  44.5  39.64410  49.35590     0

Ho : All the mu are equal.
- mu B = mu LA = mu S
H1 : At least one mu j different

Result : We reject Ho at 5% level of significance.
Conclusion : All mean of GMAT scores might not be equal.

Q30 :-

Using the data in the Excel file Cell Phone Survey, apply ANOVA to determine if the mean response for Value for the Dollar is the same for different types of cell phones.

library(readxl)
df2 <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Cell Phone Survey.xlsx",1,range = "A3:G55")
av2 <- aov(`Value for the Dollar` ~ Type,data = df2)
anova(av2)
## Analysis of Variance Table
## 
## Response: Value for the Dollar
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## Type       2  5.261 2.63061  3.1112 0.05345 .
## Residuals 49 41.431 0.84553                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data("InsectSprays")
names(InsectSprays)
## [1] "count" "spray"
av3 <- aov(count ~ spray, data = InsectSprays)
anova(av3)
## Analysis of Variance Table
## 
## Response: count
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## spray      5 2668.8  533.77  34.702 < 2.2e-16 ***
## Residuals 66 1015.2   15.38                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(av3)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = count ~ spray, data = InsectSprays)
## 
## $spray
##            diff        lwr       upr     p adj
## B-A   0.8333333  -3.866075  5.532742 0.9951810
## C-A -12.4166667 -17.116075 -7.717258 0.0000000
## D-A  -9.5833333 -14.282742 -4.883925 0.0000014
## E-A -11.0000000 -15.699409 -6.300591 0.0000000
## F-A   2.1666667  -2.532742  6.866075 0.7542147
## C-B -13.2500000 -17.949409 -8.550591 0.0000000
## D-B -10.4166667 -15.116075 -5.717258 0.0000002
## E-B -11.8333333 -16.532742 -7.133925 0.0000000
## F-B   1.3333333  -3.366075  6.032742 0.9603075
## D-C   2.8333333  -1.866075  7.532742 0.4920707
## E-C   1.4166667  -3.282742  6.116075 0.9488669
## F-C  14.5833333   9.883925 19.282742 0.0000000
## E-D  -1.4166667  -6.116075  3.282742 0.9488669
## F-D  11.7500000   7.050591 16.449409 0.0000000
## F-E  13.1666667   8.467258 17.866075 0.0000000

Two-way ANOVA

groc <- read.csv("/home/student/RCodes/Stats With R/Datasets/Grocery.csv",header = T)
anl <- aov(groc$Price ~ groc$Store + groc$Item)

summary(anl)
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## groc$Store   2   2.73   1.364   5.643 0.00873 ** 
## groc$Item   14 217.24  15.517  64.182 < 2e-16 ***
## Residuals   28   6.77   0.242                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anl,"groc$Store")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = groc$Price ~ groc$Store + groc$Item)
## 
## $`groc$Store`
##                        diff        lwr        upr     p adj
## Wal-Mart-Tops    -0.6026667 -1.0469139 -0.1584194 0.0062620
## Wegmans-Tops     -0.2806667 -0.7249139  0.1635806 0.2779414
## Wegmans-Wal-Mart  0.3220000 -0.1222473  0.7662473 0.1901506
library(readxl)
dffb <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Facebook Survey.xlsx",range = "A3:D36")
t.test(`Hours online/week` ~ Gender,data = dffb)
## 
##  Welch Two Sample t-test
## 
## data:  Hours online/week by Gender
## t = -0.20654, df = 25.576, p-value = 0.838
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.571496  2.102266
## sample estimates:
## mean in group female   mean in group male 
##             6.150000             6.384615
wilcox.test(`Hours online/week` ~ Gender, data = dffb)
## Warning in wilcox.test.default(x = c(4, 10, 9, 5, 7, 12, 2, 6, 2, 6, 8, :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Hours online/week by Gender
## W = 129, p-value = 0.9852
## alternative hypothesis: true location shift is not equal to 0

Shapiro.test()

#Q20
cps <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Cell Phone Survey.xlsx",range = "A3:G55")
shapiro.test(cps$`Value for the Dollar`)
## 
##  Shapiro-Wilk normality test
## 
## data:  cps$`Value for the Dollar`
## W = 0.88961, p-value = 0.0001674
shapiro.test(cps$`Customer Service`)
## 
##  Shapiro-Wilk normality test
## 
## data:  cps$`Customer Service`
## W = 0.89059, p-value = 0.0001801
wilcox.test(cps$`Value for the Dollar` ~ cps$Gender)
## Warning in wilcox.test.default(x = c(2, 3, 5, 5, 3, 3, 5, 3, 3, 4, 4, 2, :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  cps$`Value for the Dollar` by cps$Gender
## W = 290, p-value = 0.7519
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(cps$`Customer Service` ~ cps$Gender)
## Warning in wilcox.test.default(x = c(3, 4, 4, 4, 2, 3, 3, 3, 3, 1, 3, 3, :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  cps$`Customer Service` by cps$Gender
## W = 232.5, p-value = 0.135
## alternative hypothesis: true location shift is not equal to 0

A producer of computer-aided design software for the aerospace industry receives numerous calls for technical support. Tracking software is used to monitor response and resolution times. In addi- tion, the company surveys customers who request support using the following scale:
- 0—did not exceed expectations
- 1—marginally met expectations
- 2—met expectations
- 3—exceeded expectations
- 4—greatly exceeded expectations.
The questions are as follows:
Q1: Did the support representative explain the pro- cess for resolving your problem?
Q2: D id the support representative keep you in- formed about the status of progress in resolving your problem?
Q3: Was the support representative courteous and professional?
Q4: Was your problem resolved?
Q5: W as your problem resolved in an acceptable amount of time?
Q6: Overall, how did you find the service provided by our technical support department?

A final question asks the customer to rate the overall quality of the product using a scale of 0—very poor; 1—poor; 2—good; 3—very good; 4—excellent. A sample of survey responses and associated resolution and response data are provided in the Excel file Cus- tomer Support Survey.
a. The company has set a service standard of 1 day for the mean resolution time. Does evidence ex- ist that the response time is more than 1 day? How do the outliers in the data affect your result? What should you do about them?
b. Test the hypothesis that the average service index is equal to the average engineer index.

#Q24
css <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Customer Support Survey.xlsx",range = "A3:K47")
shapiro.test(css$`Resolution Time (Days)`)
## 
##  Shapiro-Wilk normality test
## 
## data:  css$`Resolution Time (Days)`
## W = 0.43603, p-value = 8.704e-12
library(signmedian.test)
signmedian.test(css$`Resolution Time (Days)`,mu = 1,alternative = 'g')
## 
##  Exact sign test
## 
## data:  css$`Resolution Time (Days)`
## #(x>1) = 16, mu = 1, p-value = 0.9756
## alternative hypothesis: the median of x is greater than mu
## 95.12332 percent confidence interval:
##  0.05 1.06
## sample estimates:
## point estimator 
##           0.225
resolve <- css$`Resolution Time (Days)`[css$`Resolution Time (Days)` < 20]
mean(resolve)
## [1] 1.55075
t.test(resolve,mu = 1,alternative = 'g')
## 
##  One Sample t-test
## 
## data:  resolve
## t = 1.3367, df = 39, p-value = 0.09453
## alternative hypothesis: true mean is greater than 1
## 95 percent confidence interval:
##  0.8565562       Inf
## sample estimates:
## mean of x 
##   1.55075
shapiro.test(css$`Service Index`)
## 
##  Shapiro-Wilk normality test
## 
## data:  css$`Service Index`
## W = 0.88417, p-value = 0.0003677
shapiro.test(css$`Engineer Index`)
## 
##  Shapiro-Wilk normality test
## 
## data:  css$`Engineer Index`
## W = 0.89072, p-value = 0.0005712

Q25

Using the data in the Excel file Ohio Education Performance, test the hypotheses that the mean difference in writing and reading scores is zero and that the mean difference in math and science scores is zero. Use the paired-sample procedure.

css2 <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Ohio Education Performance.xlsx",range = "A3:G34")
shapiro.test(css2$Reading)
## 
##  Shapiro-Wilk normality test
## 
## data:  css2$Reading
## W = 0.95932, p-value = 0.2798
shapiro.test(css2$Writing)
## 
##  Shapiro-Wilk normality test
## 
## data:  css2$Writing
## W = 0.88569, p-value = 0.003226