library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Customer <- read_csv("~/Downloads/Customer.csv")
## Parsed with column specification:
## cols(
## agegroup = col_double(),
## Satisfaction = col_double(),
## recommend = col_double()
## )
head(Customer)
## # A tibble: 6 x 3
## agegroup Satisfaction recommend
## <dbl> <dbl> <dbl>
## 1 1 1 9
## 2 1 1 9
## 3 2 1 10
## 4 2 1 9
## 5 2 1 10
## 6 2 2 4
Customer <-Customer %>%
mutate(agegroup =
ifelse(agegroup==1,"Children Under age 7",
ifelse(agegroup==2,"Children Over age 7", NA)),
Satisfaction = ifelse(Satisfaction==1,"Satisfied",
ifelse(Satisfaction==2,"Dissatisfied", NA)),
recommend = ifelse(recommend>10,NA,recommend))%>%
select(recommend, Satisfaction, agegroup)%>%
filter(agegroup %in% c("Children Under age 7","Children Over age 7"))
table(Customer$agegroup)%>%
prop.table()%>%
round(2)
##
## Children Over age 7 Children Under age 7
## 0.74 0.26
table(Customer$Satisfaction)%>%
prop.table()%>%
round(2)
##
## Dissatisfied Satisfied
## 0.08 0.92
Children Over age 7 .74 * .08 = .06 Dissatisfied .74 * .92 = .69 Satisfied
Children Under age 7 .26 * .08 = .02 Dissatisfied .26 * .92 = .24 Satisfied
table(Customer$agegroup, Customer$Satisfaction) %>%
prop.table(1)
##
## Dissatisfied Satisfied
## Children Over age 7 0.09701493 0.90298507
## Children Under age 7 0.04519774 0.95480226
chisq.test(Customer$Satisfaction, Customer$agegroup)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: Customer$Satisfaction and Customer$agegroup
## X-squared = 8.4154, df = 1, p-value = 0.003721
options(scipen =9999)
Customer %>%
group_by(agegroup) %>%
summarize(recommend= mean(recommend, na.rm=TRUE))
## # A tibble: 2 x 2
## agegroup recommend
## <chr> <dbl>
## 1 Children Over age 7 8.90
## 2 Children Under age 7 9.17
Customer %>%
filter(agegroup %in% c("Children Under age 7", "Children Over age 7")) %>%
ggplot()+
geom_histogram(aes(x=recommend, fill=agegroup))+
facet_wrap(~agegroup)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 33 rows containing non-finite values (stat_bin).
Customer_under <- Customer %>%
filter(agegroup=="Children Under age 7")
Customer_under <-
replicate(10000,
sample(Customer_under$recommend,40) %>%
mean(na.rm=TRUE)) %>%
data.frame()%>%
rename("mean"=1)
Customer_over <- Customer %>%
filter(agegroup=="Children Over age 7")
Customer_over <-
replicate(10000,
sample(Customer_over$recommend,40) %>%
mean(na.rm=TRUE)) %>%
data.frame()%>%
rename("mean"=1)
ggplot()+
geom_histogram(data=Customer_under, aes(x=mean),fill="lavender")+ geom_histogram(data=Customer_over, aes(x=mean),fill="pink")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Customer%>%
summarize(recommend= mean(recommend,na.rm=TRUE))
## # A tibble: 1 x 1
## recommend
## <dbl>
## 1 8.97
t.test(recommend~agegroup, data=Customer)
##
## Welch Two Sample t-test
##
## data: recommend by agegroup
## t = -3.4958, df = 901.74, p-value = 0.0004956
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4157539 -0.1167816
## sample estimates:
## mean in group Children Over age 7 mean in group Children Under age 7
## 8.904187 9.170455
options(scipen =9999)