library(readxl)
df <- read_excel("/home/student/RCodes/Stats With R/Data_Files/New.xlsx",1,range = "A4:B42")
av <- aov(Scores ~ Major,data = df)
anova(av)
## Analysis of Variance Table
##
## Response: Scores
## Df Sum Sq Mean Sq F value Pr(>F)
## Major 2 12180.9 6090.4 306.93 < 2.2e-16 ***
## Residuals 35 694.5 19.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(av)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Scores ~ Major, data = df)
##
## $Major
## diff lwr upr p adj
## Liberal Arts-Business -16.0 -21.11069 -10.88931 0
## Sciences-Business 28.5 24.53213 32.46787 0
## Sciences-Liberal Arts 44.5 39.64410 49.35590 0
Ho : All the mu are equal.
- mu B = mu LA = mu S
H1 : At least one mu j different
Result : We reject Ho at 5% level of significance.
Conclusion : All mean of GMAT scores might not be equal.
library(readxl)
df2 <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Cell Phone Survey.xlsx",1,range = "A3:G55")
av2 <- aov(`Value for the Dollar` ~ Type,data = df2)
anova(av2)
## Analysis of Variance Table
##
## Response: Value for the Dollar
## Df Sum Sq Mean Sq F value Pr(>F)
## Type 2 5.261 2.63061 3.1112 0.05345 .
## Residuals 49 41.431 0.84553
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data("InsectSprays")
names(InsectSprays)
## [1] "count" "spray"
av3 <- aov(count ~ spray, data = InsectSprays)
anova(av3)
## Analysis of Variance Table
##
## Response: count
## Df Sum Sq Mean Sq F value Pr(>F)
## spray 5 2668.8 533.77 34.702 < 2.2e-16 ***
## Residuals 66 1015.2 15.38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(av3)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = count ~ spray, data = InsectSprays)
##
## $spray
## diff lwr upr p adj
## B-A 0.8333333 -3.866075 5.532742 0.9951810
## C-A -12.4166667 -17.116075 -7.717258 0.0000000
## D-A -9.5833333 -14.282742 -4.883925 0.0000014
## E-A -11.0000000 -15.699409 -6.300591 0.0000000
## F-A 2.1666667 -2.532742 6.866075 0.7542147
## C-B -13.2500000 -17.949409 -8.550591 0.0000000
## D-B -10.4166667 -15.116075 -5.717258 0.0000002
## E-B -11.8333333 -16.532742 -7.133925 0.0000000
## F-B 1.3333333 -3.366075 6.032742 0.9603075
## D-C 2.8333333 -1.866075 7.532742 0.4920707
## E-C 1.4166667 -3.282742 6.116075 0.9488669
## F-C 14.5833333 9.883925 19.282742 0.0000000
## E-D -1.4166667 -6.116075 3.282742 0.9488669
## F-D 11.7500000 7.050591 16.449409 0.0000000
## F-E 13.1666667 8.467258 17.866075 0.0000000
groc <- read.csv("/home/student/RCodes/Stats With R/Datasets/Grocery.csv",header = T)
anl <- aov(groc$Price ~ groc$Store + groc$Item)
summary(anl)
## Df Sum Sq Mean Sq F value Pr(>F)
## groc$Store 2 2.73 1.364 5.643 0.00873 **
## groc$Item 14 217.24 15.517 64.182 < 2e-16 ***
## Residuals 28 6.77 0.242
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anl,"groc$Store")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = groc$Price ~ groc$Store + groc$Item)
##
## $`groc$Store`
## diff lwr upr p adj
## Wal-Mart-Tops -0.6026667 -1.0469139 -0.1584194 0.0062620
## Wegmans-Tops -0.2806667 -0.7249139 0.1635806 0.2779414
## Wegmans-Wal-Mart 0.3220000 -0.1222473 0.7662473 0.1901506
library(readxl)
dffb <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Facebook Survey.xlsx",range = "A3:D36")
t.test(`Hours online/week` ~ Gender,data = dffb)
##
## Welch Two Sample t-test
##
## data: Hours online/week by Gender
## t = -0.20654, df = 25.576, p-value = 0.838
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.571496 2.102266
## sample estimates:
## mean in group female mean in group male
## 6.150000 6.384615
wilcox.test(`Hours online/week` ~ Gender, data = dffb)
## Warning in wilcox.test.default(x = c(4, 10, 9, 5, 7, 12, 2, 6, 2, 6, 8, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: Hours online/week by Gender
## W = 129, p-value = 0.9852
## alternative hypothesis: true location shift is not equal to 0
#Q20
cps <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Cell Phone Survey.xlsx",range = "A3:G55")
shapiro.test(cps$`Value for the Dollar`)
##
## Shapiro-Wilk normality test
##
## data: cps$`Value for the Dollar`
## W = 0.88961, p-value = 0.0001674
shapiro.test(cps$`Customer Service`)
##
## Shapiro-Wilk normality test
##
## data: cps$`Customer Service`
## W = 0.89059, p-value = 0.0001801
wilcox.test(cps$`Value for the Dollar` ~ cps$Gender)
## Warning in wilcox.test.default(x = c(2, 3, 5, 5, 3, 3, 5, 3, 3, 4, 4, 2, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: cps$`Value for the Dollar` by cps$Gender
## W = 290, p-value = 0.7519
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(cps$`Customer Service` ~ cps$Gender)
## Warning in wilcox.test.default(x = c(3, 4, 4, 4, 2, 3, 3, 3, 3, 1, 3, 3, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: cps$`Customer Service` by cps$Gender
## W = 232.5, p-value = 0.135
## alternative hypothesis: true location shift is not equal to 0
A producer of computer-aided design software for the aerospace industry receives numerous calls for technical support. Tracking software is used to monitor response and resolution times. In addi- tion, the company surveys customers who request support using the following scale:
- 0—did not exceed expectations
- 1—marginally met expectations
- 2—met expectations
- 3—exceeded expectations
- 4—greatly exceeded expectations.
The questions are as follows:
Q1: Did the support representative explain the pro- cess for resolving your problem?
Q2: D id the support representative keep you in- formed about the status of progress in resolving your problem?
Q3: Was the support representative courteous and professional?
Q4: Was your problem resolved?
Q5: W as your problem resolved in an acceptable amount of time?
Q6: Overall, how did you find the service provided by our technical support department?
A final question asks the customer to rate the overall quality of the product using a scale of 0—very poor; 1—poor; 2—good; 3—very good; 4—excellent. A sample of survey responses and associated resolution and response data are provided in the Excel file Cus- tomer Support Survey.
a. The company has set a service standard of 1 day for the mean resolution time. Does evidence ex- ist that the response time is more than 1 day? How do the outliers in the data affect your result? What should you do about them?
b. Test the hypothesis that the average service index is equal to the average engineer index.
#Q24
css <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Customer Support Survey.xlsx",range = "A3:K47")
shapiro.test(css$`Resolution Time (Days)`)
##
## Shapiro-Wilk normality test
##
## data: css$`Resolution Time (Days)`
## W = 0.43603, p-value = 8.704e-12
library(signmedian.test)
signmedian.test(css$`Resolution Time (Days)`,mu = 1,alternative = 'g')
##
## Exact sign test
##
## data: css$`Resolution Time (Days)`
## #(x>1) = 16, mu = 1, p-value = 0.9756
## alternative hypothesis: the median of x is greater than mu
## 95.12332 percent confidence interval:
## 0.05 1.06
## sample estimates:
## point estimator
## 0.225
resolve <- css$`Resolution Time (Days)`[css$`Resolution Time (Days)` < 20]
mean(resolve)
## [1] 1.55075
t.test(resolve,mu = 1,alternative = 'g')
##
## One Sample t-test
##
## data: resolve
## t = 1.3367, df = 39, p-value = 0.09453
## alternative hypothesis: true mean is greater than 1
## 95 percent confidence interval:
## 0.8565562 Inf
## sample estimates:
## mean of x
## 1.55075
shapiro.test(css$`Service Index`)
##
## Shapiro-Wilk normality test
##
## data: css$`Service Index`
## W = 0.88417, p-value = 0.0003677
shapiro.test(css$`Engineer Index`)
##
## Shapiro-Wilk normality test
##
## data: css$`Engineer Index`
## W = 0.89072, p-value = 0.0005712
css2 <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Ohio Education Performance.xlsx",range = "A3:G34")
shapiro.test(css2$Reading)
##
## Shapiro-Wilk normality test
##
## data: css2$Reading
## W = 0.95932, p-value = 0.2798
shapiro.test(css2$Writing)
##
## Shapiro-Wilk normality test
##
## data: css2$Writing
## W = 0.88569, p-value = 0.003226