# set working directory as Datasets
data1 <- read.csv("G:\\IIMK DABS\\R\\Datasets\\wsesdata.csv", header = TRUE, sep = ",", stringsAsFactors = TRUE)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data1 <- data1 %>% rename("Sl.no" = `Opportunity.No.`)
data1$Product <- factor(data1$Product, ordered = TRUE, levels = c("GTMSys", "Procsys", "LearnSys", "Finsys", "Lifesys","Logissys","ContactSys"))
data1$Industry <- factor(data1$Industry)
data1$Region <- factor(data1$Region)
data1 <- data1 %>% rename(sales_value = `Sales.Value.in.Million` )
data1$`Reporting Status` <- factor(data1$`Reporting.Status`)
data1$`Leads Conversion Class` <- factor(data1$`Leads.Conversion.Class`)
data1 <- data1 %>% rename(Profit.of.customer = `Profit.of.Customer.in.Million`)
data1 <- data1 %>% rename(Relative.Strength = `Relative.Strength.in.the.segment`)
data1 <- data1 %>% rename(WSES.Proportion =`WSES.Proportion.in.Joint.Bid`)
data1 <- data1 %>% rename(Profit = `Profit..`)
data1 <- data1 %>% rename(Reporting.status = `Reporting.Status`)
data1 <- data1 %>% rename(`Leads.Conversion.Class` = `Leads.Conversion.Class`)
# Question 1
#1. The marketing team of WSES believes that the average sales value of the leads that they receive is at least 8 million dollars.
#The marketing team believes that the standard deviation of sale value is about 2 million dollars.
#Use an appropriate hypothesis test to check whether the average sales value in the population is at least 8 million dollars.
mean_sales.value <- mean(data1$sales_value)
std.error <- 2/sqrt(1000)
z_statistc <- (mean_sales.value- 8)/ std.error
pnorm(z_statistc)
## [1] 0.7576813
pnorm <- pnorm(z_statistc)
1- pnorm
## [1] 0.2423187
## Question 2
#QUESTION 2
#Jason McCullagh, senior marketing manager at WSES doubted the value of standard deviation provided by the marketing team.
#Jason argued that there is no way the marketing team could have known the population standard deviation for the sales value,
#since the population itself is unknown. Do you agree with Jason McCullagh? If yes, perform the test again using an appropriate hypothesis test.
t.test_value <- t.test(data1$sales_value, mu = 8, alternative = "greater")
#Question 3: Prudy Perkins, the Chief Marketing Office (CMO) informed the board that they win at least 50% of the sales leads that they receive.
#Use an appropriate hypothesis testing procedure to check whether the proportion of leads won by WSES is more than 50%.
#Ho : p <= 0.50, Ha : p >0.50
summary(data1$Reporting.status)
## Lost Won
## 519 481
prop.test(x= c(481), p= 0.5, alternative = "greater", n= 1000)
##
## 1-sample proportions test with continuity correction
##
## data: c(481) out of 1000, null probability 0.5
## X-squared = 1.369, df = 1, p-value = 0.879
## alternative hypothesis: true p is greater than 0.5
## 95 percent confidence interval:
## 0.4546 1.0000
## sample estimates:
## p
## 0.481
# Question 4
# Question ; Hendry Jackson, who works in the product line "learnsys",
#claims that the probability of winning a sales lead for the product "learnsys"
#is more than that of "Finsys". Is there statistically significant evidence in favor
#of Hendry's claim?
data3 <- data1 %>% select(`Reporting.status`, Product) %>% filter(data1$Product== "LearnSys" | data1$Product == "Finsys")
# here we are using dplyr package to create a new dataframe with the columns and values that need to be analysed
table(data3)
## Product
## Reporting.status GTMSys Procsys LearnSys Finsys Lifesys Logissys ContactSys
## Lost 0 0 55 83 0 0 0
## Won 0 0 71 34 0 0 0
prop.table(table(data3), margin = 2)
## Product
## Reporting.status GTMSys Procsys LearnSys Finsys Lifesys Logissys ContactSys
## Lost 0.4365079 0.7094017
## Won 0.5634921 0.2905983
# margin is used to calculate either rowwise proportion or columnwise proportion
# margin =1 for rowwise proportion calculation and margin = 2 for columnwise proportion
options(scipen = 100)
summary(data3)
## Reporting.status Product
## Lost:138 GTMSys : 0
## Won :105 Procsys : 0
## LearnSys :126
## Finsys :117
## Lifesys : 0
## Logissys : 0
## ContactSys: 0
prop.test(x= c(71,34), n= c(126, 117), alternative = "greater")
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(71, 34) out of c(126, 117)
## X-squared = 17.316, df = 1, p-value = 0.00001583
## alternative hypothesis: greater
## 95 percent confidence interval:
## 0.1644089 1.0000000
## sample estimates:
## prop 1 prop 2
## 0.5634921 0.2905983
# Comparison btw Learnsys and Lifesys
data4 <- data1 %>% select(`Reporting.status`, Product) %>% filter(data1$Product== "LearnSys" | data1$Product == "Lifesys")
table(data4)
## Product
## Reporting.status GTMSys Procsys LearnSys Finsys Lifesys Logissys ContactSys
## Lost 0 0 55 0 58 0 0
## Won 0 0 71 0 54 0 0
prop.table(table(data4), margin = 2)
## Product
## Reporting.status GTMSys Procsys LearnSys Finsys Lifesys Logissys ContactSys
## Lost 0.4365079 0.5178571
## Won 0.5634921 0.4821429
prop.test(x=c(71, 54), n= c(126, 112), alternative = "greater")
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(71, 54) out of c(126, 112)
## X-squared = 1.2642, df = 1, p-value = 0.1304
## alternative hypothesis: greater
## 95 percent confidence interval:
## -0.0334461 1.0000000
## sample estimates:
## prop 1 prop 2
## 0.5634921 0.4821429
# Comparison btw UK and Other Europe
data5 <- data1 %>% select(`Reporting.status`, Region) %>% filter(data1$Region == "UK" | data1$Region== "Other Europe")
table(data5)
## Region
## Reporting.status Africa Americas Canada India Japan Other Europe Singapore
## Lost 0 0 0 0 0 92 0
## Won 0 0 0 0 0 66 0
## Region
## Reporting.status Spain UK
## Lost 0 286
## Won 0 267
summary(data5)
## Reporting.status Region
## Lost:378 UK :553
## Won :333 Other Europe:158
## Africa : 0
## Americas : 0
## Canada : 0
## India : 0
## (Other) : 0
prop.test(x= c(267, 66), n= c(553, 158), alternative = "greater")
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(267, 66) out of c(553, 158)
## X-squared = 1.8383, df = 1, p-value = 0.08758
## alternative hypothesis: greater
## 95 percent confidence interval:
## -0.01236324 1.00000000
## sample estimates:
## prop 1 prop 2
## 0.4828210 0.4177215
# Question 5: Hendry Jackson also claims that the average sales value of
#"learnsys" projects is higher than that of "Finsys" projects.
#Check whether he is correct at 5% significance.
# Comparing avg sales value of Learnsys and finsys to check if there is a significant difference
# 2 sample T test
# Ho : Avg sales value Learnsys <= avg sales value Finsys
# Ha : Avg sales value Learnsys > avg sales value Finsys
names(data1)
## [1] "Sl.no" "Reporting.status" "Sales.Outcome"
## [4] "Product" "Industry" "Region"
## [7] "Relative.Strength" "Profit.of.customer" "sales_value"
## [10] "Profit" "WSES.Proportion" "Leads.Conversion.Class"
## [13] "Reporting Status" "Leads Conversion Class"
summary(data1$Product)
## GTMSys Procsys LearnSys Finsys Lifesys Logissys ContactSys
## 463 133 126 117 112 29 20
data6 <- data1 %>% select(Product, sales_value) %>% filter(Product == "LearnSys" | Product == "Finsys")
t.test(sales_value ~ Product, data = data6, alternative = "greater", var.equal = TRUE)
##
## Two Sample t-test
##
## data: sales_value by Product
## t = 0.93503, df = 241, p-value = 0.1754
## alternative hypothesis: true difference in means between group LearnSys and group Finsys is greater than 0
## 95 percent confidence interval:
## -0.1728648 Inf
## sample estimates:
## mean in group LearnSys mean in group Finsys
## 8.030476 7.804786
#or
t.test(x= data1$sales_value [data1$Product == "LearnSys"], y= data1$sales_value [data1$Product == "Finsys"], alternative = "greater", var.equal = TRUE)
##
## Two Sample t-test
##
## data: data1$sales_value[data1$Product == "LearnSys"] and data1$sales_value[data1$Product == "Finsys"]
## t = 0.93503, df = 241, p-value = 0.1754
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -0.1728648 Inf
## sample estimates:
## mean of x mean of y
## 8.030476 7.804786
# Comparing avg sales value of Learnsys and Procsys to check if there is a significant difference
#Ho : avg sales value Learnsys <= avg sales value Finsys
#Ha : avg sales value Learnsys > avg sales value Procsys
# Two Sample T test
data7 <- data1 %>% select(Product, sales_value) %>% filter(Product == "LearnSys" | Product== "Procsys")
t.test(sales_value ~ Product, data = data7, alternative = "greater", var.equal = TRUE)
##
## Two Sample t-test
##
## data: sales_value by Product
## t = 0.76326, df = 257, p-value = 0.223
## alternative hypothesis: true difference in means between group Procsys and group LearnSys is greater than 0
## 95 percent confidence interval:
## -0.2147027 Inf
## sample estimates:
## mean in group Procsys mean in group LearnSys
## 8.215113 8.030476
#OR
t.test(x= data7$sales_value[data7$Product== "Procsys"], y= data7$sales_value[data7$Product== "LearnSys"], alternative = "greater", var.equal = TRUE)
##
## Two Sample t-test
##
## data: data7$sales_value[data7$Product == "Procsys"] and data7$sales_value[data7$Product == "LearnSys"]
## t = 0.76326, df = 257, p-value = 0.223
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -0.2147027 Inf
## sample estimates:
## mean of x mean of y
## 8.215113 8.030476
#Question 6: Liz was of the opinion that there is difference in the average profit across
#geographical locations, such as United Kingdom, India and the America.
#Use an appropriate test to verify the same.
#ANOVA Test
summary(data1$Region)
## Africa Americas Canada India Japan Other Europe
## 93 104 6 35 16 158
## Singapore Spain UK
## 23 12 553
data8 <- data1 %>% select(Region, Profit) %>% filter (Region == "India"| Region == "Americas"| Region == "UK")
anova <- aov(Profit ~ Region, data = data8)
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## Region 2 297 148.7 1.505 0.223
## Residuals 689 68075 98.8
model.tables(anova, type = "means")
## Tables of means
## Grand mean
##
## 50.7052
##
## Region
## Americas India UK
## 50.33 53.51 50.6
## rep 104.00 35.00 553.0
summary(aov(Profit ~ Region, data = data8))
## Df Sum Sq Mean Sq F value Pr(>F)
## Region 2 297 148.7 1.505 0.223
## Residuals 689 68075 98.8
#ANOVA for all regions
anova1 <- aov(Profit ~ Region, data = data1)
summary(anova1)
## Df Sum Sq Mean Sq F value Pr(>F)
## Region 8 1171 146.3 1.428 0.18
## Residuals 991 101556 102.5
model.tables(anova1, type = "means")
## Tables of means
## Grand mean
##
## 50.703
##
## Region
## Africa Americas Canada India Japan Other Europe Singapore Spain UK
## 50.97 50.33 43.17 53.51 48.12 50.36 54.61 52.75 50.6
## rep 93.00 104.00 6.00 35.00 16.00 158.00 23.00 12.00 553.0
#ANOVA Relative Strength comparison
data9 <- data1 %>% select(Relative.Strength, Region)
anova2 <- aov(Relative.Strength ~ Region, data = data9)
summary(anova2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Region 8 17554 2194.2 24.48 <0.0000000000000002 ***
## Residuals 991 88819 89.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.tables(anova2, type = "means")
## Tables of means
## Grand mean
##
## 49.605
##
## Region
## Africa Americas Canada India Japan Other Europe Singapore Spain UK
## 45.56 46.45 25.83 44 47.44 51.67 66.39 66 49.91
## rep 93.00 104.00 6.00 35 16.00 158.00 23.00 12 553.00
TukeyHSD(anova2, which = "Region")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Relative.Strength ~ Region, data = data9)
##
## $Region
## diff lwr upr p adj
## Americas-Africa 0.8927833 -3.3073387 5.0929053 0.9992043
## Canada-Africa -19.7258065 -32.1219740 -7.3296389 0.0000312
## India-Africa -1.5591398 -7.3951552 4.2768756 0.9959439
## Japan-Africa 1.8783602 -6.0868772 9.8435976 0.9983146
## Other Europe-Africa 6.1117463 2.2653522 9.9581404 0.0000323
## Singapore-Africa 20.8321646 13.9786941 27.6856350 0.0000000
## Spain-Africa 20.4408602 11.4137345 29.4679860 0.0000000
## UK-Africa 4.3504443 1.0520759 7.6488127 0.0014710
## Canada-Americas -20.6185897 -32.9749621 -8.2622174 0.0000092
## India-Americas -2.4519231 -8.2029268 3.2990806 0.9237556
## Japan-Americas 0.9855769 -6.9175854 8.8887393 0.9999859
## Other Europe-Americas 5.2189630 1.5028204 8.9351056 0.0004745
## Singapore-Americas 19.9393813 13.1581554 26.7206072 0.0000000
## Spain-Americas 19.5480769 10.5756766 28.5204772 0.0000000
## UK-Americas 3.4576610 0.3121559 6.6031661 0.0189272
## India-Canada 18.1666667 5.1628984 31.1704349 0.0005247
## Japan-Canada 21.6041667 7.5157345 35.6925988 0.0000750
## Other Europe-Canada 25.8375527 13.5968967 38.0782088 0.0000000
## Singapore-Canada 40.5579710 27.0669010 54.0490410 0.0000000
## Spain-Canada 40.1666667 25.4517792 54.8815541 0.0000000
## UK-Canada 24.0762508 11.9965923 36.1559092 0.0000000
## Japan-India 3.4375000 -5.4438302 12.3188302 0.9558614
## Other Europe-India 7.6708861 2.1729077 13.1688645 0.0005378
## Singapore-India 22.3913043 14.4917426 30.2908661 0.0000000
## Spain-India 22.0000000 12.1551019 31.8448981 0.0000000
## UK-India 5.9095841 0.7800371 11.0391311 0.0107559
## Other Europe-Japan 4.2333861 -3.4876046 11.9543768 0.7438818
## Singapore-Japan 18.9538043 9.3731464 28.5344623 0.0000000
## Spain-Japan 18.5625000 7.3238191 29.8011809 0.0000121
## UK-Japan 2.4720841 -4.9910375 9.9352057 0.9829945
## Singapore-Other Europe 14.7204183 8.1524089 21.2884277 0.0000000
## Spain-Other Europe 14.3291139 5.5167538 23.1414740 0.0000181
## UK-Other Europe -1.7613020 -4.4160948 0.8934908 0.4999296
## Spain-Singapore -0.3913043 -10.8714284 10.0888197 1.0000000
## UK-Singapore -16.4817203 -22.7445658 -10.2188748 0.0000000
## UK-Spain -16.0904159 -24.6777424 -7.5030894 0.0000003
# 2-Way ANOVA
# Here, two categorical variables and its significance on the numeric variable
anova3 <- aov(Relative.Strength ~ Region + Product + Region*Product, data = data1)
summary(anova3)
## Df Sum Sq Mean Sq F value Pr(>F)
## Region 8 17554 2194.2 31.30 <0.0000000000000002 ***
## Product 6 10358 1726.3 24.62 <0.0000000000000002 ***
## Region:Product 6 9819 1636.6 23.34 <0.0000000000000002 ***
## Residuals 979 68642 70.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.tables(anova3, type = "means")
## Tables of means
## Grand mean
##
## 49.605
##
## Region
## Africa Americas Canada India Japan Other Europe Singapore Spain UK
## 45.56 46.45 25.83 44 47.44 51.67 66.39 66 49.91
## rep 93.00 104.00 6.00 35 16.00 158.00 23.00 12 553.00
##
## Product
## GTMSys Procsys LearnSys Finsys Lifesys Logissys ContactSys
## 50.27 48.66 46.8 54.53 47.3 38.21 58.7
## rep 463.00 133.00 126.0 117.00 112.0 29.00 20.0
##
## Region:Product
## Product
## Region GTMSys Procsys LearnSys Finsys Lifesys Logissys ContactSys
## Africa 45.84 45.00
## rep 62.00 0.00 31.00 0.00 0.00 0.00 0.00
## Americas 45.20 55.23
## rep 91.00 13.00 0.00 0.00 0.00 0.00 0.00
## Canada 29.00 10.00
## rep 0.00 5.00 0.00 0.00 1.00 0.00 0.00
## India 44.00
## rep 0.00 35.00 0.00 0.00 0.00 0.00 0.00
## Japan 58.67 33.00
## rep 9.00 7.00 0.00 0.00 0.00 0.00 0.00
## Other Europe 46.50 55.97 53.64
## rep 56.00 0.00 0.00 38.00 64.00 0.00 0.00
## Singapore 66.39
## rep 23.00 0.00 0.00 0.00 0.00 0.00 0.00
## Spain 66.00
## rep 12.00 0.00 0.00 0.00 0.00 0.00 0.00
## UK 52.74 47.79 46.37 55.14 42.06 38.52 59.00
## rep 210.00 73.00 95.00 79.00 47.00 29.00 20.00
TukeyHSD(anova3, which = "Region")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Relative.Strength ~ Region + Product + Region * Product, data = data1)
##
## $Region
## diff lwr upr p adj
## Americas-Africa 0.8927833 -2.8222307 4.607797 0.9980708
## Canada-Africa -19.7258065 -30.6902344 -8.761378 0.0000010
## India-Africa -1.5591398 -6.7211038 3.602824 0.9906727
## Japan-Africa 1.8783602 -5.1669037 8.923624 0.9959997
## Other Europe-Africa 6.1117463 2.7096052 9.513887 0.0000011
## Singapore-Africa 20.8321646 14.7702602 26.894069 0.0000000
## Spain-Africa 20.4408602 12.4563545 28.425366 0.0000000
## UK-Africa 4.3504443 1.4330327 7.267856 0.0001397
## Canada-Americas -20.6185897 -31.5478188 -9.689361 0.0000002
## India-Americas -2.4519231 -7.5386941 2.634848 0.8564112
## Japan-Americas 0.9855769 -6.0047815 7.975935 0.9999637
## Other Europe-Americas 5.2189630 1.9320295 8.505896 0.0000329
## Singapore-Americas 19.9393813 13.9413772 25.937385 0.0000000
## Spain-Americas 19.5480769 11.6119760 27.484178 0.0000000
## UK-Americas 3.4576610 0.6754573 6.239865 0.0037877
## India-Canada 18.1666667 6.6648150 29.668518 0.0000374
## Japan-Canada 21.6041667 9.1429283 34.065405 0.0000032
## Other Europe-Canada 25.8375527 15.0106749 36.664431 0.0000000
## Singapore-Canada 40.5579710 28.6251003 52.490842 0.0000000
## Spain-Canada 40.1666667 27.1513276 53.182006 0.0000000
## UK-Canada 24.0762508 13.3917756 34.760726 0.0000000
## Japan-India 3.4375000 -4.4180492 11.293049 0.9122625
## Other Europe-India 7.6708861 2.8079163 12.533856 0.0000386
## Singapore-India 22.3913043 15.4041307 29.378478 0.0000000
## Spain-India 22.0000000 13.2921735 30.707826 0.0000000
## UK-India 5.9095841 1.3724924 10.446676 0.0018145
## Other Europe-Japan 4.2333861 -2.5958413 11.062613 0.5949985
## Singapore-Japan 18.9538043 10.4796987 27.427910 0.0000000
## Spain-Japan 18.5625000 8.6218706 28.503129 0.0000003
## UK-Japan 2.4720841 -4.1290576 9.073226 0.9636698
## Singapore-Other Europe 14.7204183 8.9110045 20.529832 0.0000000
## Spain-Other Europe 14.3291139 6.5345688 22.123659 0.0000005
## UK-Other Europe -1.7613020 -4.1094700 0.586866 0.3239332
## Spain-Singapore -0.3913043 -9.6609890 8.878380 1.0000000
## UK-Singapore -16.4817203 -22.0212161 -10.942224 0.0000000
## UK-Spain -16.0904159 -23.6859185 -8.494913 0.0000000
TukeyHSD(anova3, which = "Product")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Relative.Strength ~ Region + Product + Region * Product, data = data1)
##
## $Product
## diff lwr upr p adj
## Procsys-GTMSys -1.6125114 -4.046347 0.8213237 0.4427451
## LearnSys-GTMSys -3.4763644 -5.962165 -0.9905642 0.0007706
## Finsys-GTMSys 4.2597518 1.699899 6.8196040 0.0000213
## Lifesys-GTMSys -2.9770925 -5.582159 -0.3720263 0.0134298
## Logissys-GTMSys -12.0612165 -16.796842 -7.3255905 0.0000000
## ContactSys-GTMSys 8.4215421 2.771491 14.0715937 0.0002379
## LearnSys-Procsys -1.8638530 -4.939405 1.2116993 0.5548244
## Finsys-Procsys 5.8722632 2.736555 9.0079710 0.0000008
## Lifesys-Procsys -1.3645811 -4.537307 1.8081448 0.8652576
## Logissys-Procsys -10.4487051 -15.518811 -5.3785993 0.0000000
## ContactSys-Procsys 10.0340535 4.100850 15.9672569 0.0000143
## Finsys-LearnSys 7.7361162 4.559906 10.9123266 0.0000000
## Lifesys-LearnSys 0.4992719 -2.713490 3.7120338 0.9992976
## Logissys-LearnSys -8.5848521 -13.680107 -3.4895972 0.0000157
## ContactSys-LearnSys 11.8979066 5.943198 17.8526149 0.0000001
## Lifesys-Finsys -7.2368442 -10.507239 -3.9664498 0.0000000
## Logissys-Finsys -16.3209682 -21.452758 -11.1891787 0.0000000
## ContactSys-Finsys 4.1617904 -1.824209 10.1477901 0.3812306
## Logissys-Lifesys -9.0841240 -14.238616 -3.9296319 0.0000049
## ContactSys-Lifesys 11.3986346 5.393161 17.4041086 0.0000006
## ContactSys-Logissys 20.4827586 13.292105 27.6734124 0.0000000
model.tables(anova3, type = "means")
## Tables of means
## Grand mean
##
## 49.605
##
## Region
## Africa Americas Canada India Japan Other Europe Singapore Spain UK
## 45.56 46.45 25.83 44 47.44 51.67 66.39 66 49.91
## rep 93.00 104.00 6.00 35 16.00 158.00 23.00 12 553.00
##
## Product
## GTMSys Procsys LearnSys Finsys Lifesys Logissys ContactSys
## 50.27 48.66 46.8 54.53 47.3 38.21 58.7
## rep 463.00 133.00 126.0 117.00 112.0 29.00 20.0
##
## Region:Product
## Product
## Region GTMSys Procsys LearnSys Finsys Lifesys Logissys ContactSys
## Africa 45.84 45.00
## rep 62.00 0.00 31.00 0.00 0.00 0.00 0.00
## Americas 45.20 55.23
## rep 91.00 13.00 0.00 0.00 0.00 0.00 0.00
## Canada 29.00 10.00
## rep 0.00 5.00 0.00 0.00 1.00 0.00 0.00
## India 44.00
## rep 0.00 35.00 0.00 0.00 0.00 0.00 0.00
## Japan 58.67 33.00
## rep 9.00 7.00 0.00 0.00 0.00 0.00 0.00
## Other Europe 46.50 55.97 53.64
## rep 56.00 0.00 0.00 38.00 64.00 0.00 0.00
## Singapore 66.39
## rep 23.00 0.00 0.00 0.00 0.00 0.00 0.00
## Spain 66.00
## rep 12.00 0.00 0.00 0.00 0.00 0.00 0.00
## UK 52.74 47.79 46.37 55.14 42.06 38.52 59.00
## rep 210.00 73.00 95.00 79.00 47.00 29.00 20.00
TukeyHSD(anova3, which = "Region : Product")
## Warning in qtukey(conf.level, length(means), x$df.residual): NaNs produced
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Relative.Strength ~ Region + Product + Region * Product, data = data1)
##
## $<NA>
## diff lwr upr p adj
interaction.plot(data1$Region, data1$Product, data1$Relative.Strength, col = data1$Product)

#Question 7: Jack Williams, the CEO of the company believed that the sales conversions are different
#for different for different geographical locations.
#Check the validity of Jack's belief using an appropriate hypothesis test.
#here, we have to check whether sales conversion (reporting status) varies with region. Both are categorical variables.
#so we use chi square test.
#Ho: No association between sales conversion and region, Ha: there is an association
data10 <- data1 %>% select(Region, Reporting.status)
table(data10)
## Reporting.status
## Region Lost Won
## Africa 38 55
## Americas 49 55
## Canada 2 4
## India 18 17
## Japan 6 10
## Other Europe 92 66
## Singapore 17 6
## Spain 11 1
## UK 286 267
prop.table(table(data10), margin = 1)
## Reporting.status
## Region Lost Won
## Africa 0.40860215 0.59139785
## Americas 0.47115385 0.52884615
## Canada 0.33333333 0.66666667
## India 0.51428571 0.48571429
## Japan 0.37500000 0.62500000
## Other Europe 0.58227848 0.41772152
## Singapore 0.73913043 0.26086957
## Spain 0.91666667 0.08333333
## UK 0.51717902 0.48282098
chisq.test(data10$Reporting.status, data10$Region)
## Warning in chisq.test(data10$Reporting.status, data10$Region): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: data10$Reporting.status and data10$Region
## X-squared = 22.263, df = 8, p-value = 0.004452
# Question 8: Joe Danby, the chief financial officer believes that
#the sales conversions depend on the sales value. Use an appropriate hypothesis test to check the validity
# of this claim by making the following 3 groups
# Sales value less than 6 million dollars.
# Sales value between 6 and 8 million (both inclusive) dollars.
# More than 8 million dollars.
data11 <- data1 %>% select(sales_value, Reporting.status) %>% mutate(sales.value.category = case_when(sales_value < 6 ~ "Low", sales_value >= 6 & sales_value <= 8 ~ "Medium", sales_value > 8 ~ "High"))
chisq.test(data11$Reporting.status, data11$sales.value.category)
##
## Pearson's Chi-squared test
##
## data: data11$Reporting.status and data11$sales.value.category
## X-squared = 1.377, df = 2, p-value = 0.5023
data12 <- data11 %>% select(Reporting.status, sales.value.category)
table(data12)
## sales.value.category
## Reporting.status High Low Medium
## Lost 251 86 182
## Won 248 69 164
prop.table(table(data12), margin = 2)*100
## sales.value.category
## Reporting.status High Low Medium
## Lost 50.30060 55.48387 52.60116
## Won 49.69940 44.51613 47.39884