Packages
library(tidyverse)
[30m-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.2.1 --[39m
[30m[32mv[30m [34mggplot2[30m 2.2.1 [32mv[30m [34mpurrr [30m 0.2.4
[32mv[30m [34mtibble [30m 1.4.2 [32mv[30m [34mdplyr [30m 0.7.4
[32mv[30m [34mtidyr [30m 0.8.0 [32mv[30m [34mstringr[30m 1.3.0
[32mv[30m [34mreadr [30m 1.1.1 [32mv[30m [34mforcats[30m 0.3.0[39m
[30m-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
Dataset and data cleaning
df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vStv7Pr69DtRKv6Nw6gVBep8hbT3pEeO6B1vNwxK_1DUHgpoTgbuRpZ4SvgtHFQnBZJVGeeQVyRuXZl/pub?gid=1261585400&single=true&output=csv")
Missing column names filled in: 'X6' [6], 'X7' [7], 'X9' [9]Parsed with column specification:
cols(
N = col_integer(),
`Merijumu punkts` = col_character(),
Comparison = col_character(),
`difference i/o` = col_integer(),
`difference e/0` = col_integer(),
X6 = col_character(),
X7 = col_character(),
Clave = col_character(),
X9 = col_character()
)
df <- janitor::clean_names(df)
df <- df %>%
select(merijumu_punkts:difference_e_0)
df <- df %>%
rename(intraoral = difference_i_o,
extraoral = difference_e_0)
EDA
Summary
summary(df)
merijumu_punkts comparison intraoral extraoral
Length:540 Length:540 Min. : 0.00 Min. : 0.00
Class :character Class :character 1st Qu.:30.00 1st Qu.: 0.00
Mode :character Mode :character Median :30.00 Median : 0.00
Mean :26.33 Mean :13.06
3rd Qu.:30.00 3rd Qu.:30.00
Max. :90.00 Max. :30.00
Long to wide dataset (for easier calculations)
df <- df %>%
gather(key = "measurement", value = "value", intraoral:extraoral)
N of measurements
df %>%
group_by(comparison, measurement) %>%
summarise(N = n()) %>%
spread(measurement, N)
Mean and sd by type of measurement
df %>%
group_by(comparison, measurement) %>%
summarise(Mean = mean(value)) %>%
spread(measurement, Mean)
df %>%
group_by(comparison, measurement) %>%
summarise(sd = sd(value)) %>%
spread(measurement, sd)
Distributions
df %>%
ggplot(aes(x = value)) +
geom_histogram(bins = 4) +
facet_grid(measurement ~ .) +
theme_minimal() +
labs(
title = "Distribution of measurement",
y = "Count",
x = "Difference"
)

Boxplot
df %>%
ggplot(aes(x = measurement, y = value)) +
geom_boxplot() +
theme_minimal()

df %>%
group_by(comparison, measurement) %>%
summarise(Mean = mean(value)) %>%
ggplot(aes(x = fct_reorder(comparison, Mean), y = Mean)) +
geom_col() +
facet_grid(. ~ measurement) +
theme_minimal()

Se observan diferencias entre las mediciones intraorales y las extraorales. En general, las differencias son menores para las extraorales
Hay diferencias entre grupos?
tres factores - intra vs extraoral: 2 niveles - lugar de la medición: 19 niveles - comparación (1 vs 2, etc)
comparacion <- aov(df$value ~ df$merijumu_punkts + df$comparison + df$measurement)
summary(comparacion)
Df Sum Sq Mean Sq F value Pr(>F)
df$merijumu_punkts 59 53949 914 3.935 <2e-16 ***
df$comparison 8 3297 412 1.773 0.0785 .
df$measurement 1 47601 47601 204.826 <2e-16 ***
Residuals 1011 234952 232
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Interpretación: hay diferencias significativas entre puntos y entre intra/extra
df %>%
group_by(merijumu_punkts, measurement) %>%
summarise(mean = mean(value)) %>%
# spread(measurement, mean) %>%
ggplot(aes(x = fct_reorder(merijumu_punkts, mean), y = mean)) +
geom_boxplot() +
theme_minimal() +
coord_flip()

df %>%
ggplot(aes(x = fct_reorder(merijumu_punkts, value), y = value)) +
geom_col() +
#coord_flip() +
facet_wrap(~measurement)

NA
library(tidyverse)
[30m-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.2.1 --[39m
[30m[32mv[30m [34mggplot2[30m 2.2.1 [32mv[30m [34mpurrr [30m 0.2.4
[32mv[30m [34mtibble [30m 1.4.2 [32mv[30m [34mdplyr [30m 0.7.4
[32mv[30m [34mtidyr [30m 0.8.0 [32mv[30m [34mstringr[30m 1.3.0
[32mv[30m [34mreadr [30m 1.1.1 [32mv[30m [34mforcats[30m 0.3.0[39m
[30m-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
Aira 2
df2 <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vStv7Pr69DtRKv6Nw6gVBep8hbT3pEeO6B1vNwxK_1DUHgpoTgbuRpZ4SvgtHFQnBZJVGeeQVyRuXZl/pub?gid=20675042&single=true&output=csv")
Parsed with column specification:
cols(
`Merijumu punkts` = col_character(),
`1_1` = col_integer(),
`1_2` = col_integer(),
`1_3` = col_integer(),
`1_4` = col_integer(),
`1_5` = col_integer(),
`1_6` = col_integer(),
`1_7` = col_integer(),
`1_8` = col_integer(),
`1_9` = col_integer(),
`1_10` = col_integer(),
Measurement = col_character()
)
df2 <- janitor::clean_names(df2)
head(df2)
df2 <- df2 %>%
gather(key = "comparison", value = "value", x1_1:x1_10)
df2 %>%
arrange(desc(value))
package 㤼㸱bindrcpp㤼㸲 was built under R version 3.4.4
voy a reemplazar el extraoral = 300 por 30
which(df2$value == 300)
[1] 1620
lo cambio
df2$value[1620] = 30
ordeno los factores
df2$measurement <- factor(df2$measurement, levels = c("intraoral", "extraoral", "intra_vs_extra"))
table(df2$measurement)
intraoral extraoral intra_vs_extra
600 600 600
cambio por valores absolutos
df2$value <- abs(df2$value)
Boxplot per group
intra and extraoral

Inferential
table(df2_intra_vs_extra$measurement)
intra_vs_extra
600
summary(model_intra_vs_extra)
Df Sum Sq Mean Sq F value Pr(>F)
merijumu_punkts 59 1474927 24999 55.969 < 2e-16 ***
comparison 9 31107 3456 7.738 9.78e-11 ***
Residuals 531 237173 447
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
mean(df2_intra_vs_extra$value)
[1] 89.43333

Convert the merijumu_punkts column
df2_intra_vs_extra$merijumu_punkts <- str_replace_all(df2_intra_vs_extra$merijumu_punkts, " ", "")
Warning messages:
1: Unknown or uninitialised column: 'str_replace_all'.
2: Unknown or uninitialised column: 'str_replace_all'.
df2_intra_vs_extra <- df2_intra_vs_extra %>%
separate(merijumu_punkts, c("merijumu_punkts", "delete"), sep = "L") %>%
separate(merijumu_punkts, c("merijumu_punkts", "delete"), sep = "V")
Expected 2 pieces. Missing pieces filled with `NA` in 600 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].Expected 2 pieces. Missing pieces filled with `NA` in 600 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

