library(tidyverse) # load tidiverse library
df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vStv7Pr69DtRKv6Nw6gVBep8hbT3pEeO6B1vNwxK_1DUHgpoTgbuRpZ4SvgtHFQnBZJVGeeQVyRuXZl/pub?gid=20675042&single=true&output=csv") # load the dataset from google drive
Parsed with column specification:
cols(
`Merijumu punkts` = col_character(),
Virsmas = col_character(),
`1_1` = col_integer(),
`1_2` = col_integer(),
`1_3` = col_integer(),
`1_4` = col_integer(),
`1_5` = col_integer(),
`1_6` = col_integer(),
`1_7` = col_integer(),
`1_8` = col_integer(),
`1_9` = col_integer(),
`1_10` = col_integer(),
Measurement = col_character()
)
head(df) # explore the dataset
summary(df) # view a summary of dataset
Mērījumu punkts Virsmas 1_1 1_2 1_3 1_4 1_5 1_6
Length:180 Length:180 Min. :-60.00 Min. :-90.00 Min. :-90.00 Min. :-90.00 Min. :-90.00 Min. :-60.00
Class :character Class :character 1st Qu.: 52.50 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
Mode :character Mode :character Median : 90.00 Median : 30.00 Median : 30.00 Median : 30.00 Median : 30.00 Median : 30.00
Mean : 80.33 Mean : 31.28 Mean : 35.44 Mean : 37.67 Mean : 34.67 Mean : 38.78
3rd Qu.:120.00 3rd Qu.: 60.00 3rd Qu.: 60.00 3rd Qu.: 60.00 3rd Qu.: 60.00 3rd Qu.: 60.00
Max. :200.00 Max. :200.00 Max. :200.00 Max. :250.00 Max. :250.00 Max. :250.00
NA's :120
1_7 1_8 1_9 1_10 Measurement
Min. :-90.00 Min. :-90.00 Min. :-60.00 Min. :-60.00 Length:180
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 Class :character
Median : 30.00 Median : 30.00 Median : 30.00 Median : 0.00 Mode :character
Mean : 38.17 Mean : 38.94 Mean : 31.72 Mean : 25.17
3rd Qu.: 60.00 3rd Qu.: 60.00 3rd Qu.: 30.00 3rd Qu.: 30.00
Max. :250.00 Max. :250.00 Max. :250.00 Max. :250.00
df <- janitor::clean_names(df) # normalize names, requires janitor library
df <- df %>% # create a new df, replacing the old one
gather(key = "comparison", value = "value", x1_1:x1_10 ) # gathering the columns x1_1 to X1-10. The titles will be in a new colum named "comparison" and the values to a column "value"
summary(df)
merijumu_punkts virsmas measurement comparison value
Length:1800 Length:1800 Length:1800 Length:1800 Min. :-90.00
Class :character Class :character Class :character Class :character 1st Qu.: 0.00
Mode :character Mode :character Mode :character Mode :character Median : 30.00
Mean : 36.28
3rd Qu.: 60.00
Max. :250.00
NA's :120
df <- df %>% # create a new df, replacing the old one
select(-merijumu_punkts) # without the merijumu column, since we will not use it
df$value <- abs(df$value) # change all the values to absolute, to eliminate negative values
Ready for analysis
df %>%
filter(measurement == "extraoral" | measurement == "intraoral")%>% # filter excluiding the intra_vs_extra level
# filter(comparison != "x1_1") %>%
group_by(comparison) %>% # group by
summarise(Mean = mean(value), # create a column for each summary
sd = sd(value),
Min = min(value),
Max = max(value),
Q25 = quantile(value, na.rm = T, probs = .25),
Median = quantile(value, na.rm = T, probs = .5),
Q75 = quantile(value, na.rm = T, probs = .75))
df %>%
filter(measurement == "extraoral" | measurement == "intraoral") %>% # filter excluiding the intra_vs_extra level
ggplot(aes(x =fct_reorder(comparison, value), y = value)) +
geom_boxplot() +
theme_minimal()
df_i_e <- df %>%
filter(measurement == "extraoral" | measurement == "intraoral") # create a new df with only i and e
aov(value ~ comparison, data = df_i_e)
Call:
aov(formula = value ~ comparison, data = df_i_e)
Terms:
comparison Residuals
Sum of Squares 3296.7 336502.5
Deg. of Freedom 8 1071
Residual standard error: 17.72554
Estimated effects may be unbalanced
120 observations deleted due to missingness
summary(aov(value ~ comparison, data = df_i_e))
Df Sum Sq Mean Sq F value Pr(>F)
comparison 8 3297 412.1 1.312 0.234
Residuals 1071 336502 314.2
120 observations deleted due to missingness
df %>%
filter(measurement == "extraoral" | measurement == "intraoral") %>% # filter excluiding the intra_vs_extra level
# filter(comparison != "x1_1") %>%
group_by(virsmas) %>% # group by
summarise(Mean = mean(value, na.rm=TRUE), # create a column for each summary
sd = sd(value, na.rm=TRUE),
Min = min(value, na.rm=TRUE),
Max = max(value, na.rm=TRUE),
Q25 = quantile(value, na.rm = T, probs = .25),
Median = quantile(value, na.rm = T, probs = .5),
Q75 = quantile(value, na.rm = T, probs = .75))
df %>%
filter(measurement == "extraoral" | measurement == "intraoral") %>% # filter excluiding the intra_vs_extra level
ggplot(aes(x =fct_reorder(virsmas, value), y = value)) +
geom_boxplot() +
theme_minimal()
aov(value ~ virsmas, data = df_i_e)
Call:
aov(formula = value ~ virsmas, data = df_i_e)
Terms:
virsmas Residuals
Sum of Squares 2313.2 337486.0
Deg. of Freedom 3 1076
Residual standard error: 17.71013
Estimated effects may be unbalanced
120 observations deleted due to missingness
summary(aov(value ~ virsmas, data = df_i_e))
Df Sum Sq Mean Sq F value Pr(>F)
virsmas 3 2313 771.1 2.458 0.0614 .
Residuals 1076 337486 313.6
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
120 observations deleted due to missingness
df %>%
filter(measurement == "extraoral" | measurement == "intraoral") %>%
group_by(measurement) %>% # group by
summarise(Mean = mean(value, na.rm=TRUE), # create a column for each summary
sd = sd(value, na.rm=TRUE),
Min = min(value, na.rm=TRUE),
Max = max(value, na.rm=TRUE),
Q25 = quantile(value, na.rm = T, probs = .25),
Median = quantile(value, na.rm = T, probs = .5),
Q75 = quantile(value, na.rm = T, probs = .75))
df %>%
filter(measurement == "extraoral" | measurement == "intraoral") %>% # filter excluiding the intra_vs_extra level
ggplot(aes(x =fct_reorder(measurement, value), y = value)) +
geom_boxplot() +
theme_minimal()
aov(value ~ measurement, data = df_i_e)
Call:
aov(formula = value ~ measurement, data = df_i_e)
Terms:
measurement Residuals
Sum of Squares 47600.83 292198.33
Deg. of Freedom 1 1078
Residual standard error: 16.46378
Estimated effects may be unbalanced
120 observations deleted due to missingness
summary(aov(value ~ measurement, data = df_i_e))
Df Sum Sq Mean Sq F value Pr(>F)
measurement 1 47601 47601 175.6 <2e-16 ***
Residuals 1078 292198 271
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
120 observations deleted due to missingness
df %>%
filter(measurement == "intra_vs_extra") %>%
group_by(comparison) %>% # group by
summarise(Mean = mean(value, na.rm=TRUE), # create a column for each summary
sd = sd(value, na.rm=TRUE),
Min = min(value, na.rm=TRUE),
Max = max(value, na.rm=TRUE),
Q25 = quantile(value, na.rm = T, probs = .25),
Median = quantile(value, na.rm = T, probs = .5),
Q75 = quantile(value, na.rm = T, probs = .75))
df %>%
filter(measurement == "intra_vs_extra") %>%
ggplot(aes(x = comparison, y = value)) +
geom_boxplot() +
theme_minimal()
df_intravsextra <- df %>%
filter(measurement == "intra_vs_extra")
aov(value ~ comparison, data = df_intravsextra)
Call:
aov(formula = value ~ comparison, data = df_intravsextra)
Terms:
comparison Residuals
Sum of Squares 31107.3 1712100.0
Deg. of Freedom 9 590
Residual standard error: 53.86896
Estimated effects may be unbalanced
summary(aov(value ~ comparison, data = df_intravsextra))
Df Sum Sq Mean Sq F value Pr(>F)
comparison 9 31107 3456 1.191 0.298
Residuals 590 1712100 2902
df %>%
filter(measurement == "intra_vs_extra") %>%
group_by(virsmas) %>% # group by
summarise(Mean = mean(value, na.rm=TRUE), # create a column for each summary
sd = sd(value, na.rm=TRUE),
Min = min(value, na.rm=TRUE),
Max = max(value, na.rm=TRUE),
Q25 = quantile(value, na.rm = T, probs = .25),
Median = quantile(value, na.rm = T, probs = .5),
Q75 = quantile(value, na.rm = T, probs = .75))
df %>%
filter(measurement == "intra_vs_extra") %>%
ggplot(aes(x = fct_reorder(virsmas, value), y = value)) +
geom_boxplot() +
theme_minimal()
df_intravsextra <- df %>%
filter(measurement == "intra_vs_extra")
aov(value ~ virsmas, data = df_intravsextra)
Call:
aov(formula = value ~ virsmas, data = df_intravsextra)
Terms:
virsmas Residuals
Sum of Squares 663971.4 1079235.9
Deg. of Freedom 3 596
Residual standard error: 42.55348
Estimated effects may be unbalanced
summary(aov(value ~ virsmas, data = df_intravsextra))
Df Sum Sq Mean Sq F value Pr(>F)
virsmas 3 663971 221324 122.2 <2e-16 ***
Residuals 596 1079236 1811
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1