table(data$posneg, data$items)
##
## 5 7 10
## n 66 48 48
## p 56 30 40
table(data_p$posneg, data_p$items)
##
## 5 7 10
## p 56 30 40
table(data_n$posneg, data_n$items)
##
## 5 7 10
## n 66 48 48
hist(data$rate[data$items == 5])
hist(data$rate[data$items == 7])
hist(data$rate[data$items == 10])
hist(data$rate[data$posneg == "n" & data$items == 5])
hist(data$rate[data$posneg == "n" & data$items == 7])
hist(data$rate[data$posneg == "n" & data$items == 10])
hist(data$rate[data$posneg == "p" & data$items == 5])
hist(data$rate[data$posneg == "p" & data$items == 7])
hist(data$rate[data$posneg == "p" & data$items == 10])
p<-ggplot(data, aes(x=rate)) +
geom_histogram() +
geom_histogram(binwidth=1)
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p + facet_grid(posneg ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p + facet_grid(items ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p + facet_grid(items ~ posneg)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p<-ggplot(data, aes(x=as.factor(items), y = rate)) +
geom_boxplot()
p
p + facet_grid(posneg~.)
p + facet_grid(items ~ .)
p + facet_grid(items ~ posneg)
data %>%
filter(posneg == "p") %>%
group_by(as.factor(items)) %>%
get_summary_stats(rate, type = "mean_sd")
## # A tibble: 3 x 5
## `as.factor(items)` variable n mean sd
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 5 rate 56 4.11 0.679
## 2 7 rate 30 6.13 0.9
## 3 10 rate 40 8.4 1.13
ggboxplot(data_p, x="items", y="rate")
model_p <- lm(rate ~ as.factor(items), data = data_p)
ggqqplot(residuals(model_p))
ggqqplot(data_p, "rate", facet.by = "items")
plot(model_p, 1) ## Homogneity of variance assumption
plot(model_p, 2) ## Normal Q-Q
plot(model_p, 4) ## Cook's distance
summary(model_p)
##
## Call:
## lm(formula = rate ~ as.factor(items), data = data_p)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4000 -0.4000 -0.1071 0.8667 1.6000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1071 0.1195 34.36 <2e-16 ***
## as.factor(items)7 2.0262 0.2024 10.01 <2e-16 ***
## as.factor(items)10 4.2929 0.1852 23.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8945 on 123 degrees of freedom
## Multiple R-squared: 0.8142, Adjusted R-squared: 0.8112
## F-statistic: 269.5 on 2 and 123 DF, p-value: < 2.2e-16
res.aov_p <- data_p %>% rstatix::anova_test(rate ~ as.factor(items))
## Coefficient covariances computed by hccm()
res.aov_p
## ANOVA Table (type II tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 as.factor(items) 2 123 269.491 1.11e-45 * 0.814
pwc_p <- data_p %>% rstatix::tukey_hsd(rate ~ as.factor(items))
pwc_p
## # A tibble: 3 x 9
## term group1 group2 null.value estimate conf.low conf.high p.adj
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 as.f… 5 7 0 2.03 1.55 2.51 9.51e-14
## 2 as.f… 5 10 0 4.29 3.85 4.73 6.72e-14
## 3 as.f… 7 10 0 2.27 1.75 2.78 9.05e-14
## # … with 1 more variable: p.adj.signif <chr>
data %>%
filter(posneg == "n") %>%
group_by(as.factor(items)) %>%
get_summary_stats(rate, type = "mean_sd")
## # A tibble: 3 x 5
## `as.factor(items)` variable n mean sd
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 5 rate 66 2.64 1.21
## 2 7 rate 48 3.62 1.64
## 3 10 rate 48 4.5 2.41
ggboxplot(data_n, x="items", y="rate")
model_n <- lm(rate ~ as.factor(items), data = data_n)
ggqqplot(residuals(model_n))
ggqqplot(data_n, "rate", facet.by = "items")
plot(model_n, 1)## Homogneity of variance assumption
plot(model_n, 2)## Normal Q-Q
plot(model_n, 4)## Cook's distance
summary(model_n)
##
## Call:
## lm(formula = rate ~ as.factor(items), data = data_n)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.500 -1.500 -0.625 1.364 4.500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6364 0.2171 12.145 < 2e-16 ***
## as.factor(items)7 0.9886 0.3345 2.955 0.0036 **
## as.factor(items)10 1.8636 0.3345 5.571 1.06e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.764 on 159 degrees of freedom
## Multiple R-squared: 0.1653, Adjusted R-squared: 0.1548
## F-statistic: 15.74 on 2 and 159 DF, p-value: 5.785e-07
res.aov_n <- data_n %>% rstatix::anova_test(rate ~ as.factor(items))
## Coefficient covariances computed by hccm()
res.aov_n
## ANOVA Table (type II tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 as.factor(items) 2 159 15.742 5.79e-07 * 0.165
pwc_n <- data_n %>% rstatix::tukey_hsd(rate ~ as.factor(items))
pwc_n
## # A tibble: 3 x 9
## term group1 group2 null.value estimate conf.low conf.high p.adj
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 as.f… 5 7 0 0.989 0.197 1.78 1.00e-2
## 2 as.f… 5 10 0 1.86 1.07 2.66 3.17e-7
## 3 as.f… 7 10 0 0.875 0.0233 1.73 4.25e-2
## # … with 1 more variable: p.adj.signif <chr>
# Data Overview
table(data$posneg, data$items)
##
## 5 7 10
## n 66 48 48
## p 56 30 40
table(data_p$posneg, data_p$items)
##
## 5 7 10
## p 56 30 40
table(data_n$posneg, data_n$items)
##
## 5 7 10
## n 66 48 48
## Histogram
hist(data$rate_n[data$items == 5])
hist(data$rate_n[data$items == 7])
hist(data$rate_n[data$items == 10])
hist(data$rate_n[data$posneg == "n" & data$items == 5])
hist(data$rate_n[data$posneg == "n" & data$items == 7])
hist(data$rate_n[data$posneg == "n" & data$items == 10])
hist(data$rate_n[data$posneg == "p" & data$items == 5])
hist(data$rate_n[data$posneg == "p" & data$items == 7])
hist(data$rate_n[data$posneg == "p" & data$items == 10])
p<-ggplot(data, aes(x=rate_n)) +
geom_histogram() +
geom_histogram(binwidth=1)
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p + facet_grid(posneg ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p + facet_grid(items ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p + facet_grid(items ~ posneg)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Boxplot
p<-ggplot(data, aes(x=as.factor(items), y = rate_n)) +
geom_boxplot()
p
p + facet_grid(posneg~.)
p + facet_grid(items ~ .)
p + facet_grid(items ~ posneg)
# Analysis: Positive Review
## Model assumption test
data %>%
filter(posneg == "p") %>%
group_by(as.factor(items)) %>%
get_summary_stats(rate_n, type = "mean_sd")
## # A tibble: 3 x 5
## `as.factor(items)` variable n mean sd
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 5 rate_n 56 0.821 0.136
## 2 7 rate_n 30 0.876 0.129
## 3 10 rate_n 40 0.84 0.113
ggboxplot(data_p, x="items", y="rate_n")
model_p <- lm(rate_n ~ as.factor(items), data = data_p)
ggqqplot(residuals(model_p))
ggqqplot(data_p, "rate_n", facet.by = "items")
plot(model_p, 1) ## Homogneity of variance assumption
plot(model_p, 2) ## Normal Q-Q
plot(model_p, 4) ## Cook's distance
summary(model_p)
##
## Call:
## lm(formula = rate_n ~ as.factor(items), data = data_p)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30476 -0.04000 -0.02143 0.12381 0.17857
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.82143 0.01699 48.337 <2e-16 ***
## as.factor(items)7 0.05476 0.02877 1.903 0.0593 .
## as.factor(items)10 0.01857 0.02633 0.705 0.4819
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1272 on 123 degrees of freedom
## Multiple R-squared: 0.02861, Adjusted R-squared: 0.01282
## F-statistic: 1.811 on 2 and 123 DF, p-value: 0.1678
## Anova Test
res.aov_p <- data_p %>% rstatix::anova_test(rate_n ~ as.factor(items))
## Coefficient covariances computed by hccm()
res.aov_p
## ANOVA Table (type II tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 as.factor(items) 2 123 1.811 0.168 0.029
## Post_hoc test
pwc_p <- data_p %>% rstatix::tukey_hsd(rate_n ~ as.factor(items))
pwc_p
## # A tibble: 3 x 9
## term group1 group2 null.value estimate conf.low conf.high p.adj p.adj.signif
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 as.fa… 5 7 0 0.0548 -0.0135 0.123 0.142 ns
## 2 as.fa… 5 10 0 0.0186 -0.0439 0.0810 0.761 ns
## 3 as.fa… 7 10 0 -0.0362 -0.109 0.0367 0.468 ns
# Analysis: Negative Review
data %>%
filter(posneg == "n") %>%
group_by(as.factor(items)) %>%
get_summary_stats(rate_n, type = "mean_sd")
## # A tibble: 3 x 5
## `as.factor(items)` variable n mean sd
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 5 rate_n 66 0.527 0.242
## 2 7 rate_n 48 0.518 0.235
## 3 10 rate_n 48 0.45 0.241
ggboxplot(data, x="items", y="rate")
model_n <- lm(rate_n ~ as.factor(items), data = data_n)
ggqqplot(residuals(model_n))
ggqqplot(data_n, "rate_n", facet.by = "items")
plot(model_n, 1)## Homogneity of variance assumption
plot(model_n, 2)## Normal Q-Q
plot(model_n, 4)## Cook's distance
summary(model_n)
##
## Call:
## lm(formula = rate_n ~ as.factor(items), data = data_n)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37500 -0.15000 -0.08929 0.19643 0.48214
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.527273 0.029493 17.878 <2e-16 ***
## as.factor(items)7 -0.009416 0.045451 -0.207 0.8362
## as.factor(items)10 -0.077273 0.045451 -1.700 0.0911 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2396 on 159 degrees of freedom
## Multiple R-squared: 0.01976, Adjusted R-squared: 0.007429
## F-statistic: 1.602 on 2 and 159 DF, p-value: 0.2046
## Anova Test
res.aov_n <- data_n %>% rstatix::anova_test(rate_n ~ as.factor(items))
## Coefficient covariances computed by hccm()
res.aov_n
## ANOVA Table (type II tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 as.factor(items) 2 159 1.602 0.205 0.02
## Post_hoc test
pwc_n <- data_n %>% rstatix::tukey_hsd(rate_n~ as.factor(items))
pwc_n
## # A tibble: 3 x 9
## term group1 group2 null.value estimate conf.low conf.high p.adj p.adj.signif
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 as.fa… 5 7 0 -0.00942 -0.117 0.0981 0.977 ns
## 2 as.fa… 5 10 0 -0.0773 -0.185 0.0303 0.208 ns
## 3 as.fa… 7 10 0 -0.0679 -0.184 0.0479 0.35 ns
# Data Overview
table(data$posneg, data$items)
##
## 5 7 10
## n 66 48 48
## p 56 30 40
table(data_p$posneg, data_p$items)
##
## 5 7 10
## p 56 30 40
table(data_n$posneg, data_n$items)
##
## 5 7 10
## n 66 48 48
## Histogram
hist(data$rate_z[data$items == 5])
hist(data$rate_z[data$items == 7])
hist(data$rate_z[data$items == 10])
hist(data$rate_z[data$posneg == "n" & data$items == 5])
hist(data$rate_z[data$posneg == "n" & data$items == 7])
hist(data$rate_z[data$posneg == "n" & data$items == 10])
hist(data$rate_z[data$posneg == "p" & data$items == 5])
hist(data$rate_z[data$posneg == "p" & data$items == 7])
hist(data$rate_z[data$posneg == "p" & data$items == 10])
p<-ggplot(data, aes(x=rate_z)) +
geom_histogram() +
geom_histogram(binwidth=1)
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p + facet_grid(posneg ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p + facet_grid(items ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p + facet_grid(items ~ posneg)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Boxplot
p<-ggplot(data, aes(x=as.factor(items), y = rate_z)) +
geom_boxplot()
p
p + facet_grid(posneg~.)
p + facet_grid(items ~ .)
p + facet_grid(items ~ posneg)
# Analysis: Positive Review
## Model assumption test
data %>%
filter(posneg == "p") %>%
group_by(as.factor(items)) %>%
get_summary_stats(rate_z, type = "mean_sd")
## # A tibble: 3 x 5
## `as.factor(items)` variable n mean sd
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 5 rate_z 56 -0.196 0.292
## 2 7 rate_z 30 0.675 0.386
## 3 10 rate_z 40 1.65 0.485
ggboxplot(data_p, x="items", y="rate_z")
model_p <- lm(rate_z ~ as.factor(items), data = data_p)
ggqqplot(residuals(model_p))
ggqqplot(data_p, "rate_z", facet.by = "items")
plot(model_p, 1) ## Homogneity of variance assumption
plot(model_p, 2) ## Normal Q-Q
plot(model_p, 4) ## Cook's distance
summary(model_p)
##
## Call:
## lm(formula = rate_z ~ as.factor(items), data = data_p)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.03111 -0.17185 -0.04603 0.37234 0.68741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.19563 0.05136 -3.809 0.000219 ***
## as.factor(items)7 0.87051 0.08695 10.011 < 2e-16 ***
## as.factor(items)10 1.84433 0.07956 23.181 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3843 on 123 degrees of freedom
## Multiple R-squared: 0.8142, Adjusted R-squared: 0.8112
## F-statistic: 269.5 on 2 and 123 DF, p-value: < 2.2e-16
## Anova Test
res.aov_p <- data_p %>% rstatix::anova_test(rate_z ~ as.factor(items))
## Coefficient covariances computed by hccm()
res.aov_p
## ANOVA Table (type II tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 as.factor(items) 2 123 269.491 1.11e-45 * 0.814
## Post_hoc test
pwc_p <- data_p %>% rstatix::tukey_hsd(rate_z ~ as.factor(items))
pwc_p
## # A tibble: 3 x 9
## term group1 group2 null.value estimate conf.low conf.high p.adj
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 as.f… 5 7 0 0.871 0.664 1.08 9.51e-14
## 2 as.f… 5 10 0 1.84 1.66 2.03 6.72e-14
## 3 as.f… 7 10 0 0.974 0.754 1.19 9.05e-14
## # … with 1 more variable: p.adj.signif <chr>
# Analysis: Negative Review
data %>%
filter(posneg == "n") %>%
group_by(as.factor(items)) %>%
get_summary_stats(rate_z, type = "mean_sd")
## # A tibble: 3 x 5
## `as.factor(items)` variable n mean sd
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 5 rate_z 66 -0.828 0.52
## 2 7 rate_z 48 -0.403 0.707
## 3 10 rate_z 48 -0.027 1.03
ggboxplot(data, x="items", y="rate")
model_n <- lm(rate_z ~ as.factor(items), data = data_n)
ggqqplot(residuals(model_n))
ggqqplot(data_n, "rate_z", facet.by = "items")
plot(model_n, 1)## Homogneity of variance assumption
plot(model_n, 2)## Normal Q-Q
plot(model_n, 4)## Cook's distance
summary(model_n)
##
## Call:
## lm(formula = rate_z ~ as.factor(items), data = data_n)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5037 -0.6444 -0.2685 0.5859 1.9333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.82752 0.09326 -8.873 1.41e-15 ***
## as.factor(items)7 0.42475 0.14373 2.955 0.0036 **
## as.factor(items)10 0.80067 0.14373 5.571 1.06e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7577 on 159 degrees of freedom
## Multiple R-squared: 0.1653, Adjusted R-squared: 0.1548
## F-statistic: 15.74 on 2 and 159 DF, p-value: 5.785e-07
## Anova Test
res.aov_n <- data_n %>% rstatix::anova_test(rate_z ~ as.factor(items))
## Coefficient covariances computed by hccm()
res.aov_n
## ANOVA Table (type II tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 as.factor(items) 2 159 15.742 5.79e-07 * 0.165
## Post_hoc test
pwc_n <- data_n %>% rstatix::tukey_hsd(rate_z~ as.factor(items))
pwc_n
## # A tibble: 3 x 9
## term group1 group2 null.value estimate conf.low conf.high p.adj
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 as.f… 5 7 0 0.425 0.0847 0.765 1.00e-2
## 2 as.f… 5 10 0 0.801 0.461 1.14 3.17e-7
## 3 as.f… 7 10 0 0.376 0.0100 0.742 4.25e-2
## # … with 1 more variable: p.adj.signif <chr>