Packages

library(tidyverse)
-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 2.2.1     v purrr   0.2.4
v tibble  1.4.2     v dplyr   0.7.4
v tidyr   0.8.0     v stringr 1.3.0
v readr   1.1.1     v forcats 0.3.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

Dataset and data cleaning

df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vStv7Pr69DtRKv6Nw6gVBep8hbT3pEeO6B1vNwxK_1DUHgpoTgbuRpZ4SvgtHFQnBZJVGeeQVyRuXZl/pub?gid=1261585400&single=true&output=csv")
Missing column names filled in: 'X6' [6], 'X7' [7], 'X9' [9]Parsed with column specification:
cols(
  N = col_integer(),
  `Merijumu punkts` = col_character(),
  Comparison = col_character(),
  `difference i/o` = col_integer(),
  `difference e/0` = col_integer(),
  X6 = col_character(),
  X7 = col_character(),
  Clave = col_character(),
  X9 = col_character()
)
df <- janitor::clean_names(df)
df  <- df %>% 
  select(merijumu_punkts:difference_e_0)
df <- df %>% 
  rename(intraoral = difference_i_o, 
         extraoral = difference_e_0)

EDA

Summary

summary(df)
 merijumu_punkts     comparison          intraoral       extraoral    
 Length:540         Length:540         Min.   : 0.00   Min.   : 0.00  
 Class :character   Class :character   1st Qu.:30.00   1st Qu.: 0.00  
 Mode  :character   Mode  :character   Median :30.00   Median : 0.00  
                                       Mean   :26.33   Mean   :13.06  
                                       3rd Qu.:30.00   3rd Qu.:30.00  
                                       Max.   :90.00   Max.   :30.00  

Long to wide dataset (for easier calculations)

df <- df %>% 
  gather(key = "measurement", value = "value", intraoral:extraoral)

N of measurements

df %>% 
  group_by(comparison, measurement) %>% 
  summarise(N = n()) %>% 
  spread(measurement, N)

Mean and sd by type of measurement

df %>% 
  group_by(comparison, measurement) %>% 
  summarise(Mean = mean(value)) %>% 
  spread(measurement, Mean)
df %>% 
  group_by(comparison, measurement) %>% 
  summarise(sd = sd(value)) %>% 
  spread(measurement, sd)

Distributions

df %>% 
  ggplot(aes(x = value)) + 
  geom_histogram(bins = 4) + 
  facet_grid(measurement ~ .) +
  theme_minimal() + 
  labs(
    title = "Distribution of measurement", 
    y = "Count", 
    x = "Difference"
  )

Boxplot

df %>% 
  ggplot(aes(x = measurement, y = value)) + 
  geom_boxplot() + 
  theme_minimal()

df %>% 
  group_by(comparison, measurement) %>% 
  summarise(Mean = mean(value)) %>% 
  ggplot(aes(x = fct_reorder(comparison, Mean), y = Mean)) + 
  geom_col() + 
  facet_grid(. ~ measurement) + 
  theme_minimal()

Se observan diferencias entre las mediciones intraorales y las extraorales. En general, las differencias son menores para las extraorales

Hay diferencias entre grupos?

tres factores - intra vs extraoral: 2 niveles - lugar de la medición: 19 niveles - comparación (1 vs 2, etc)

comparacion <-  aov(df$value ~ df$merijumu_punkts + df$comparison + df$measurement)
summary(comparacion)
                     Df Sum Sq Mean Sq F value Pr(>F)    
df$merijumu_punkts   59  53949     914   3.935 <2e-16 ***
df$comparison         8   3297     412   1.773 0.0785 .  
df$measurement        1  47601   47601 204.826 <2e-16 ***
Residuals          1011 234952     232                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Interpretación: hay diferencias significativas entre puntos y entre intra/extra

df %>% 
  group_by(merijumu_punkts, measurement) %>% 
  summarise(mean = mean(value)) %>% 
  # spread(measurement, mean) %>% 
  ggplot(aes(x = fct_reorder(merijumu_punkts, mean), y = mean)) + 
  geom_boxplot() + 
  theme_minimal() +
  coord_flip()

df %>% 
  ggplot(aes(x = fct_reorder(merijumu_punkts, value), y = value)) + 
  geom_col() + 
  #coord_flip() + 
  facet_wrap(~measurement)

NA

library(tidyverse)
-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 2.2.1     v purrr   0.2.4
v tibble  1.4.2     v dplyr   0.7.4
v tidyr   0.8.0     v stringr 1.3.0
v readr   1.1.1     v forcats 0.3.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

Aira 2

df2 <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vStv7Pr69DtRKv6Nw6gVBep8hbT3pEeO6B1vNwxK_1DUHgpoTgbuRpZ4SvgtHFQnBZJVGeeQVyRuXZl/pub?gid=20675042&single=true&output=csv")
Parsed with column specification:
cols(
  `Merijumu punkts` = col_character(),
  `1_1` = col_integer(),
  `1_2` = col_integer(),
  `1_3` = col_integer(),
  `1_4` = col_integer(),
  `1_5` = col_integer(),
  `1_6` = col_integer(),
  `1_7` = col_integer(),
  `1_8` = col_integer(),
  `1_9` = col_integer(),
  `1_10` = col_integer(),
  Measurement = col_character()
)
df2 <- janitor::clean_names(df2)
head(df2)
df2 <- df2 %>% 
  gather(key = "comparison", value = "value", x1_1:x1_10)
df2 %>% 
  arrange(desc(value))
package 㤼㸱bindrcpp㤼㸲 was built under R version 3.4.4

voy a reemplazar el extraoral = 300 por 30

which(df2$value == 300)
[1] 1620

lo cambio

df2$value[1620] = 30

ordeno los factores

df2$measurement <- factor(df2$measurement, levels = c("intraoral", "extraoral", "intra_vs_extra"))
table(df2$measurement)

     intraoral      extraoral intra_vs_extra 
           600            600            600 

cambio por valores absolutos

df2$value <- abs(df2$value)

MEAN per group

SD

Boxplot per group

intra and extraoral

intra_vs_extra

Inferential

table(df2_intra_vs_extra$measurement)

intra_vs_extra 
           600 
summary(model_intra_vs_extra)
                 Df  Sum Sq Mean Sq F value   Pr(>F)    
merijumu_punkts  59 1474927   24999  55.969  < 2e-16 ***
comparison        9   31107    3456   7.738 9.78e-11 ***
Residuals       531  237173     447                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
mean(df2_intra_vs_extra$value)
[1] 89.43333

Convert the merijumu_punkts column

df2_intra_vs_extra$merijumu_punkts <-  str_replace_all(df2_intra_vs_extra$merijumu_punkts, " ", "")
Warning messages:
1: Unknown or uninitialised column: 'str_replace_all'. 
2: Unknown or uninitialised column: 'str_replace_all'. 
df2_intra_vs_extra <- df2_intra_vs_extra %>% 
  separate(merijumu_punkts, c("merijumu_punkts", "delete"), sep = "L") %>% 
  separate(merijumu_punkts, c("merijumu_punkts", "delete"), sep = "V")
Expected 2 pieces. Missing pieces filled with `NA` in 600 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].Expected 2 pieces. Missing pieces filled with `NA` in 600 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

