Raw Value

Overview

Table

table(data$posneg, data$items)

##    
##      5  7 10
##   n 66 48 48
##   p 56 30 40

table(data_p$posneg, data_p$items)

##    
##      5  7 10
##   p 56 30 40

table(data_n$posneg, data_n$items)

##    
##      5  7 10
##   n 66 48 48

Histogram

hist(data$rate[data$items == 5])

hist(data$rate[data$items == 7])

hist(data$rate[data$items == 10])

hist(data$rate[data$posneg == "n" & data$items == 5])

hist(data$rate[data$posneg == "n" & data$items == 7])

hist(data$rate[data$posneg == "n" & data$items == 10])

hist(data$rate[data$posneg == "p" & data$items == 5])

hist(data$rate[data$posneg == "p" & data$items == 7])

hist(data$rate[data$posneg == "p" & data$items == 10])

p<-ggplot(data, aes(x=rate)) + 
  geom_histogram() +
  geom_histogram(binwidth=1)
p

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + facet_grid(posneg ~ .)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + facet_grid(items ~ .)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + facet_grid(items ~ posneg)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Boxbplot

p<-ggplot(data, aes(x=as.factor(items), y = rate)) + 
  geom_boxplot()
p

p + facet_grid(posneg~.)

p + facet_grid(items ~ .)

p + facet_grid(items ~ posneg)

Analysis - ANOVA on Positive review

Model assumption test

data %>% 
  filter(posneg == "p") %>%
  group_by(as.factor(items)) %>%
  get_summary_stats(rate, type = "mean_sd")

## # A tibble: 3 x 5
##   `as.factor(items)` variable     n  mean    sd
##   <fct>              <chr>    <dbl> <dbl> <dbl>
## 1 5                  rate        56  4.11 0.679
## 2 7                  rate        30  6.13 0.9  
## 3 10                 rate        40  8.4  1.13

ggboxplot(data_p, x="items", y="rate")

model_p  <- lm(rate ~ as.factor(items), data = data_p)
ggqqplot(residuals(model_p))

ggqqplot(data_p, "rate", facet.by = "items")

plot(model_p, 1) ## Homogneity of variance assumption

plot(model_p, 2) ## Normal Q-Q

plot(model_p, 4) ## Cook's distance

summary(model_p)

## 
## Call:
## lm(formula = rate ~ as.factor(items), data = data_p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4000 -0.4000 -0.1071  0.8667  1.6000 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          4.1071     0.1195   34.36   <2e-16 ***
## as.factor(items)7    2.0262     0.2024   10.01   <2e-16 ***
## as.factor(items)10   4.2929     0.1852   23.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8945 on 123 degrees of freedom
## Multiple R-squared:  0.8142, Adjusted R-squared:  0.8112 
## F-statistic: 269.5 on 2 and 123 DF,  p-value: < 2.2e-16

Anova Test

res.aov_p <- data_p %>% rstatix::anova_test(rate ~ as.factor(items))

## Coefficient covariances computed by hccm()

res.aov_p

## ANOVA Table (type II tests)
## 
##             Effect DFn DFd       F        p p<.05   ges
## 1 as.factor(items)   2 123 269.491 1.11e-45     * 0.814

Post_hoc test

pwc_p <- data_p %>% rstatix::tukey_hsd(rate ~ as.factor(items))
pwc_p

## # A tibble: 3 x 9
##   term  group1 group2 null.value estimate conf.low conf.high    p.adj
## * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>    <dbl>
## 1 as.f… 5      7               0     2.03     1.55      2.51 9.51e-14
## 2 as.f… 5      10              0     4.29     3.85      4.73 6.72e-14
## 3 as.f… 7      10              0     2.27     1.75      2.78 9.05e-14
## # … with 1 more variable: p.adj.signif <chr>

Analysis - ANOVA on Negative review

Model assumption test

data %>% 
  filter(posneg == "n") %>%
  group_by(as.factor(items)) %>%
  get_summary_stats(rate, type = "mean_sd")

## # A tibble: 3 x 5
##   `as.factor(items)` variable     n  mean    sd
##   <fct>              <chr>    <dbl> <dbl> <dbl>
## 1 5                  rate        66  2.64  1.21
## 2 7                  rate        48  3.62  1.64
## 3 10                 rate        48  4.5   2.41

ggboxplot(data_n, x="items", y="rate")

model_n  <- lm(rate ~ as.factor(items), data = data_n)
ggqqplot(residuals(model_n))

ggqqplot(data_n, "rate", facet.by = "items")

plot(model_n, 1)## Homogneity of variance assumption

plot(model_n, 2)## Normal Q-Q

plot(model_n, 4)## Cook's distance

summary(model_n)

## 
## Call:
## lm(formula = rate ~ as.factor(items), data = data_n)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.500 -1.500 -0.625  1.364  4.500 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.6364     0.2171  12.145  < 2e-16 ***
## as.factor(items)7    0.9886     0.3345   2.955   0.0036 ** 
## as.factor(items)10   1.8636     0.3345   5.571 1.06e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.764 on 159 degrees of freedom
## Multiple R-squared:  0.1653, Adjusted R-squared:  0.1548 
## F-statistic: 15.74 on 2 and 159 DF,  p-value: 5.785e-07

Anova Test

res.aov_n <- data_n %>% rstatix::anova_test(rate ~ as.factor(items))

## Coefficient covariances computed by hccm()

res.aov_n

## ANOVA Table (type II tests)
## 
##             Effect DFn DFd      F        p p<.05   ges
## 1 as.factor(items)   2 159 15.742 5.79e-07     * 0.165

Post_hoc test

pwc_n <- data_n %>% rstatix::tukey_hsd(rate ~ as.factor(items))
pwc_n

## # A tibble: 3 x 9
##   term  group1 group2 null.value estimate conf.low conf.high   p.adj
## * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
## 1 as.f… 5      7               0    0.989   0.197       1.78 1.00e-2
## 2 as.f… 5      10              0    1.86    1.07        2.66 3.17e-7
## 3 as.f… 7      10              0    0.875   0.0233      1.73 4.25e-2
## # … with 1 more variable: p.adj.signif <chr>

Normalization (/ by 5,7,10)

Overview

Table

# Data Overview
table(data$posneg, data$items)

##    
##      5  7 10
##   n 66 48 48
##   p 56 30 40

table(data_p$posneg, data_p$items)

##    
##      5  7 10
##   p 56 30 40

table(data_n$posneg, data_n$items)

##    
##      5  7 10
##   n 66 48 48

Histogram

## Histogram
hist(data$rate_n[data$items == 5])

hist(data$rate_n[data$items == 7])

hist(data$rate_n[data$items == 10])

hist(data$rate_n[data$posneg == "n" & data$items == 5])

hist(data$rate_n[data$posneg == "n" & data$items == 7])

hist(data$rate_n[data$posneg == "n" & data$items == 10])

hist(data$rate_n[data$posneg == "p" & data$items == 5])

hist(data$rate_n[data$posneg == "p" & data$items == 7])

hist(data$rate_n[data$posneg == "p" & data$items == 10])

p<-ggplot(data, aes(x=rate_n)) + 
  geom_histogram() +
  geom_histogram(binwidth=1)
p

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + facet_grid(posneg ~ .)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + facet_grid(items ~ .)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + facet_grid(items ~ posneg)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Boxbplot

## Boxplot
p<-ggplot(data, aes(x=as.factor(items), y = rate_n)) + 
  geom_boxplot()
p

p + facet_grid(posneg~.)

p + facet_grid(items ~ .)

p + facet_grid(items ~ posneg)

Analysis - ANOVA on Positive review

Model assumption test

# Analysis: Positive Review
## Model assumption test
data %>% 
  filter(posneg == "p") %>%
  group_by(as.factor(items)) %>%
  get_summary_stats(rate_n, type = "mean_sd")

## # A tibble: 3 x 5
##   `as.factor(items)` variable     n  mean    sd
##   <fct>              <chr>    <dbl> <dbl> <dbl>
## 1 5                  rate_n      56 0.821 0.136
## 2 7                  rate_n      30 0.876 0.129
## 3 10                 rate_n      40 0.84  0.113

ggboxplot(data_p, x="items", y="rate_n")

model_p  <- lm(rate_n ~ as.factor(items), data = data_p)
ggqqplot(residuals(model_p))

ggqqplot(data_p, "rate_n", facet.by = "items")

plot(model_p, 1) ## Homogneity of variance assumption

plot(model_p, 2) ## Normal Q-Q

plot(model_p, 4) ## Cook's distance

summary(model_p)

## 
## Call:
## lm(formula = rate_n ~ as.factor(items), data = data_p)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30476 -0.04000 -0.02143  0.12381  0.17857 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.82143    0.01699  48.337   <2e-16 ***
## as.factor(items)7   0.05476    0.02877   1.903   0.0593 .  
## as.factor(items)10  0.01857    0.02633   0.705   0.4819    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1272 on 123 degrees of freedom
## Multiple R-squared:  0.02861,    Adjusted R-squared:  0.01282 
## F-statistic: 1.811 on 2 and 123 DF,  p-value: 0.1678

Anova Test

## Anova Test
res.aov_p <- data_p %>% rstatix::anova_test(rate_n ~ as.factor(items))

## Coefficient covariances computed by hccm()

res.aov_p

## ANOVA Table (type II tests)
## 
##             Effect DFn DFd     F     p p<.05   ges
## 1 as.factor(items)   2 123 1.811 0.168       0.029

Post_hoc test

## Post_hoc test
pwc_p <- data_p %>% rstatix::tukey_hsd(rate_n ~ as.factor(items))
pwc_p

## # A tibble: 3 x 9
##   term   group1 group2 null.value estimate conf.low conf.high p.adj p.adj.signif
## * <chr>  <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl> <dbl> <chr>       
## 1 as.fa… 5      7               0   0.0548  -0.0135    0.123  0.142 ns          
## 2 as.fa… 5      10              0   0.0186  -0.0439    0.0810 0.761 ns          
## 3 as.fa… 7      10              0  -0.0362  -0.109     0.0367 0.468 ns

Analysis - ANOVA on Negative review

Model assumption test

# Analysis: Negative Review
data %>% 
  filter(posneg == "n") %>%
  group_by(as.factor(items)) %>%
  get_summary_stats(rate_n, type = "mean_sd")

## # A tibble: 3 x 5
##   `as.factor(items)` variable     n  mean    sd
##   <fct>              <chr>    <dbl> <dbl> <dbl>
## 1 5                  rate_n      66 0.527 0.242
## 2 7                  rate_n      48 0.518 0.235
## 3 10                 rate_n      48 0.45  0.241

ggboxplot(data, x="items", y="rate")

model_n  <- lm(rate_n ~ as.factor(items), data = data_n)
ggqqplot(residuals(model_n))

ggqqplot(data_n, "rate_n", facet.by = "items")

plot(model_n, 1)## Homogneity of variance assumption

plot(model_n, 2)## Normal Q-Q

plot(model_n, 4)## Cook's distance

summary(model_n)

## 
## Call:
## lm(formula = rate_n ~ as.factor(items), data = data_n)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37500 -0.15000 -0.08929  0.19643  0.48214 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.527273   0.029493  17.878   <2e-16 ***
## as.factor(items)7  -0.009416   0.045451  -0.207   0.8362    
## as.factor(items)10 -0.077273   0.045451  -1.700   0.0911 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2396 on 159 degrees of freedom
## Multiple R-squared:  0.01976,    Adjusted R-squared:  0.007429 
## F-statistic: 1.602 on 2 and 159 DF,  p-value: 0.2046

Anova Test

## Anova Test
res.aov_n <- data_n %>% rstatix::anova_test(rate_n ~ as.factor(items))

## Coefficient covariances computed by hccm()

res.aov_n

## ANOVA Table (type II tests)
## 
##             Effect DFn DFd     F     p p<.05  ges
## 1 as.factor(items)   2 159 1.602 0.205       0.02

## Post_hoc test

Post_hoc test

pwc_n <- data_n %>% rstatix::tukey_hsd(rate_n~ as.factor(items))
pwc_n

## # A tibble: 3 x 9
##   term   group1 group2 null.value estimate conf.low conf.high p.adj p.adj.signif
## * <chr>  <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl> <dbl> <chr>       
## 1 as.fa… 5      7               0 -0.00942   -0.117    0.0981 0.977 ns          
## 2 as.fa… 5      10              0 -0.0773    -0.185    0.0303 0.208 ns          
## 3 as.fa… 7      10              0 -0.0679    -0.184    0.0479 0.35  ns

Standarization (value-mean(value)/sd)

Overview

Table

# Data Overview
table(data$posneg, data$items)

##    
##      5  7 10
##   n 66 48 48
##   p 56 30 40

table(data_p$posneg, data_p$items)

##    
##      5  7 10
##   p 56 30 40

table(data_n$posneg, data_n$items)

##    
##      5  7 10
##   n 66 48 48

Histogram

## Histogram
hist(data$rate_z[data$items == 5])

hist(data$rate_z[data$items == 7])

hist(data$rate_z[data$items == 10])

hist(data$rate_z[data$posneg == "n" & data$items == 5])

hist(data$rate_z[data$posneg == "n" & data$items == 7])

hist(data$rate_z[data$posneg == "n" & data$items == 10])

hist(data$rate_z[data$posneg == "p" & data$items == 5])

hist(data$rate_z[data$posneg == "p" & data$items == 7])

hist(data$rate_z[data$posneg == "p" & data$items == 10])

p<-ggplot(data, aes(x=rate_z)) + 
  geom_histogram() +
  geom_histogram(binwidth=1)
p

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + facet_grid(posneg ~ .)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + facet_grid(items ~ .)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + facet_grid(items ~ posneg)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Boxbplot

## Boxplot
p<-ggplot(data, aes(x=as.factor(items), y = rate_z)) + 
  geom_boxplot()
p

p + facet_grid(posneg~.)

p + facet_grid(items ~ .)

p + facet_grid(items ~ posneg)

Analysis - ANOVA on Positive review

Model assumption test

# Analysis: Positive Review
## Model assumption test
data %>% 
  filter(posneg == "p") %>%
  group_by(as.factor(items)) %>%
  get_summary_stats(rate_z, type = "mean_sd")

## # A tibble: 3 x 5
##   `as.factor(items)` variable     n   mean    sd
##   <fct>              <chr>    <dbl>  <dbl> <dbl>
## 1 5                  rate_z      56 -0.196 0.292
## 2 7                  rate_z      30  0.675 0.386
## 3 10                 rate_z      40  1.65  0.485

ggboxplot(data_p, x="items", y="rate_z")

model_p  <- lm(rate_z ~ as.factor(items), data = data_p)
ggqqplot(residuals(model_p))

ggqqplot(data_p, "rate_z", facet.by = "items")

plot(model_p, 1) ## Homogneity of variance assumption

plot(model_p, 2) ## Normal Q-Q

plot(model_p, 4) ## Cook's distance

summary(model_p)

## 
## Call:
## lm(formula = rate_z ~ as.factor(items), data = data_p)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.03111 -0.17185 -0.04603  0.37234  0.68741 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.19563    0.05136  -3.809 0.000219 ***
## as.factor(items)7   0.87051    0.08695  10.011  < 2e-16 ***
## as.factor(items)10  1.84433    0.07956  23.181  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3843 on 123 degrees of freedom
## Multiple R-squared:  0.8142, Adjusted R-squared:  0.8112 
## F-statistic: 269.5 on 2 and 123 DF,  p-value: < 2.2e-16

Anova Test

## Anova Test
res.aov_p <- data_p %>% rstatix::anova_test(rate_z ~ as.factor(items))

## Coefficient covariances computed by hccm()

res.aov_p

## ANOVA Table (type II tests)
## 
##             Effect DFn DFd       F        p p<.05   ges
## 1 as.factor(items)   2 123 269.491 1.11e-45     * 0.814

Post_hoc test

## Post_hoc test
pwc_p <- data_p %>% rstatix::tukey_hsd(rate_z ~ as.factor(items))
pwc_p

## # A tibble: 3 x 9
##   term  group1 group2 null.value estimate conf.low conf.high    p.adj
## * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>    <dbl>
## 1 as.f… 5      7               0    0.871    0.664      1.08 9.51e-14
## 2 as.f… 5      10              0    1.84     1.66       2.03 6.72e-14
## 3 as.f… 7      10              0    0.974    0.754      1.19 9.05e-14
## # … with 1 more variable: p.adj.signif <chr>

Analysis - ANOVA on Negative review

Model assumption test

# Analysis: Negative Review
data %>% 
  filter(posneg == "n") %>%
  group_by(as.factor(items)) %>%
  get_summary_stats(rate_z, type = "mean_sd")

## # A tibble: 3 x 5
##   `as.factor(items)` variable     n   mean    sd
##   <fct>              <chr>    <dbl>  <dbl> <dbl>
## 1 5                  rate_z      66 -0.828 0.52 
## 2 7                  rate_z      48 -0.403 0.707
## 3 10                 rate_z      48 -0.027 1.03

ggboxplot(data, x="items", y="rate")

model_n  <- lm(rate_z ~ as.factor(items), data = data_n)
ggqqplot(residuals(model_n))

ggqqplot(data_n, "rate_z", facet.by = "items")

plot(model_n, 1)## Homogneity of variance assumption

plot(model_n, 2)## Normal Q-Q

plot(model_n, 4)## Cook's distance

summary(model_n)

## 
## Call:
## lm(formula = rate_z ~ as.factor(items), data = data_n)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5037 -0.6444 -0.2685  0.5859  1.9333 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.82752    0.09326  -8.873 1.41e-15 ***
## as.factor(items)7   0.42475    0.14373   2.955   0.0036 ** 
## as.factor(items)10  0.80067    0.14373   5.571 1.06e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7577 on 159 degrees of freedom
## Multiple R-squared:  0.1653, Adjusted R-squared:  0.1548 
## F-statistic: 15.74 on 2 and 159 DF,  p-value: 5.785e-07

Anova Test

## Anova Test
res.aov_n <- data_n %>% rstatix::anova_test(rate_z ~ as.factor(items))

## Coefficient covariances computed by hccm()

res.aov_n

## ANOVA Table (type II tests)
## 
##             Effect DFn DFd      F        p p<.05   ges
## 1 as.factor(items)   2 159 15.742 5.79e-07     * 0.165

Post_hoc test

## Post_hoc test
pwc_n <- data_n %>% rstatix::tukey_hsd(rate_z~ as.factor(items))
pwc_n

## # A tibble: 3 x 9
##   term  group1 group2 null.value estimate conf.low conf.high   p.adj
## * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
## 1 as.f… 5      7               0    0.425   0.0847     0.765 1.00e-2
## 2 as.f… 5      10              0    0.801   0.461      1.14  3.17e-7
## 3 as.f… 7      10              0    0.376   0.0100     0.742 4.25e-2
## # … with 1 more variable: p.adj.signif <chr>

Hotel Review AB test V2 (normalizaiton, standarizaiton 추가)

Chungil Chae

2/2/2021

Raw Value

Overview

Table

Histogram

Boxbplot

Analysis - ANOVA on Positive review

Model assumption test

Anova Test

Post_hoc test

Analysis - ANOVA on Negative review

Model assumption test

Anova Test

Post_hoc test

Normalization (/ by 5,7,10)

Overview

Table

Histogram

Boxbplot

Analysis - ANOVA on Positive review

Model assumption test

Anova Test

Post_hoc test

Analysis - ANOVA on Negative review

Model assumption test

Anova Test

Post_hoc test

Standarization (value-mean(value)/sd)

Overview

Table

Histogram

Boxbplot

Analysis - ANOVA on Positive review

Model assumption test

Anova Test

Post_hoc test

Analysis - ANOVA on Negative review

Model assumption test

Anova Test

Post_hoc test