Packages
library(tidyverse)
[30m-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.2.1 --[39m
[30m[32mv[30m [34mggplot2[30m 2.2.1 [32mv[30m [34mpurrr [30m 0.2.4
[32mv[30m [34mtibble [30m 1.4.2 [32mv[30m [34mdplyr [30m 0.7.4
[32mv[30m [34mtidyr [30m 0.8.0 [32mv[30m [34mstringr[30m 1.3.0
[32mv[30m [34mreadr [30m 1.1.1 [32mv[30m [34mforcats[30m 0.3.0[39m
[30m-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
Dataset and data cleaning
df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vStv7Pr69DtRKv6Nw6gVBep8hbT3pEeO6B1vNwxK_1DUHgpoTgbuRpZ4SvgtHFQnBZJVGeeQVyRuXZl/pub?gid=1261585400&single=true&output=csv")
Missing column names filled in: 'X6' [6], 'X7' [7], 'X9' [9]Parsed with column specification:
cols(
N = col_integer(),
`Merijumu punkts` = col_character(),
Comparison = col_character(),
`difference i/o` = col_integer(),
`difference e/0` = col_integer(),
X6 = col_character(),
X7 = col_character(),
Clave = col_character(),
X9 = col_character()
)
df <- janitor::clean_names(df)
df <- df %>%
select(merijumu_punkts:difference_e_0)
df <- df %>%
rename(intraoral = difference_i_o,
extraoral = difference_e_0)
EDA
Summary
summary(df)
merijumu_punkts comparison intraoral extraoral
Length:540 Length:540 Min. : 0.00 Min. : 0.00
Class :character Class :character 1st Qu.:30.00 1st Qu.: 0.00
Mode :character Mode :character Median :30.00 Median : 0.00
Mean :26.33 Mean :13.06
3rd Qu.:30.00 3rd Qu.:30.00
Max. :90.00 Max. :30.00
Long to wide dataset (for easier calculations)
df <- df %>%
gather(key = "measurement", value = "value", intraoral:extraoral)
N of measurements
df %>%
group_by(comparison, measurement) %>%
summarise(N = n()) %>%
spread(measurement, N)
Mean and sd by type of measurement
df %>%
group_by(comparison, measurement) %>%
summarise(Mean = mean(value)) %>%
spread(measurement, Mean)
df %>%
group_by(comparison, measurement) %>%
summarise(sd = sd(value)) %>%
spread(measurement, sd)
Distributions
df %>%
ggplot(aes(x = value)) +
geom_histogram(bins = 4) +
facet_grid(measurement ~ .) +
theme_minimal() +
labs(
title = "Distribution of measurement",
y = "Count",
x = "Difference"
)

Boxplot
df %>%
ggplot(aes(x = measurement, y = value)) +
geom_boxplot() +
theme_minimal()

df %>%
group_by(comparison, measurement) %>%
summarise(Mean = mean(value)) %>%
ggplot(aes(x = fct_reorder(comparison, Mean), y = Mean)) +
geom_col() +
facet_grid(. ~ measurement) +
theme_minimal()

Se observan diferencias entre las mediciones intraorales y las extraorales. En general, las differencias son menores para las extraorales
Hay diferencias entre grupos?
tres factores - intra vs extraoral: 2 niveles - lugar de la medición: 19 niveles - comparación (1 vs 2, etc)
comparacion <- aov(df$value ~ df$merijumu_punkts + df$comparison + df$measurement)
summary(comparacion)
Df Sum Sq Mean Sq F value Pr(>F)
df$merijumu_punkts 59 53949 914 3.935 <2e-16 ***
df$comparison 8 3297 412 1.773 0.0785 .
df$measurement 1 47601 47601 204.826 <2e-16 ***
Residuals 1011 234952 232
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Interpretación: hay diferencias significativas entre puntos y entre intra/extra
df %>%
group_by(merijumu_punkts, measurement) %>%
summarise(mean = mean(value)) %>%
# spread(measurement, mean) %>%
ggplot(aes(x = fct_reorder(merijumu_punkts, mean), y = mean)) +
geom_boxplot() +
theme_minimal() +
coord_flip()

df %>%
ggplot(aes(x = fct_reorder(merijumu_punkts, value), y = value)) +
geom_col() +
#coord_flip() +
facet_wrap(~measurement)

NA
library(tidyverse)
[30m-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.2.1 --[39m
[30m[32mv[30m [34mggplot2[30m 2.2.1 [32mv[30m [34mpurrr [30m 0.2.4
[32mv[30m [34mtibble [30m 1.4.2 [32mv[30m [34mdplyr [30m 0.7.4
[32mv[30m [34mtidyr [30m 0.8.0 [32mv[30m [34mstringr[30m 1.3.0
[32mv[30m [34mreadr [30m 1.1.1 [32mv[30m [34mforcats[30m 0.3.0[39m
[30m-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
Aira 2
df2 <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vStv7Pr69DtRKv6Nw6gVBep8hbT3pEeO6B1vNwxK_1DUHgpoTgbuRpZ4SvgtHFQnBZJVGeeQVyRuXZl/pub?gid=20675042&single=true&output=csv")
Parsed with column specification:
cols(
`Merijumu punkts` = col_character(),
`1_1` = col_integer(),
`1_2` = col_integer(),
`1_3` = col_integer(),
`1_4` = col_integer(),
`1_5` = col_integer(),
`1_6` = col_integer(),
`1_7` = col_integer(),
`1_8` = col_integer(),
`1_9` = col_integer(),
`1_10` = col_integer(),
Measurement = col_character()
)
df2 <- janitor::clean_names(df2)
head(df2)
df2 <- df2 %>%
gather(key = "comparison", value = "value", x1_1:x1_10)
df2 %>%
arrange(desc(value))
package 㤼㸱bindrcpp㤼㸲 was built under R version 3.4.4
voy a reemplazar el extraoral = 300 por 30
which(df2$value == 300)
[1] 1620
lo cambio
df2$value[1620] = 30
ordeno los factores
df2$measurement <- factor(df2$measurement, levels = c("intraoral", "extraoral", "intra_vs_extra"))
table(df2$measurement)
intraoral extraoral intra_vs_extra
600 600 600
cambio por valores absolutos
df2$value <- abs(df2$value)
Boxplot per group
intra and extraoral

Inferential
table(df2_intra_vs_extra$measurement)
intra_vs_extra
600
summary(model_intra_vs_extra)
Df Sum Sq Mean Sq F value Pr(>F)
merijumu_punkts 59 1474927 24999 55.969 < 2e-16 ***
comparison 9 31107 3456 7.738 9.78e-11 ***
Residuals 531 237173 447
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
mean(df2_intra_vs_extra$value)
[1] 89.43333

Convert the merijumu_punkts column
df2_intra_vs_extra$merijumu_punkts <- str_replace_all(df2_intra_vs_extra$merijumu_punkts, " ", "")
Warning messages:
1: Unknown or uninitialised column: 'str_replace_all'.
2: Unknown or uninitialised column: 'str_replace_all'.
df2_intra_vs_extra <- df2_intra_vs_extra %>%
separate(merijumu_punkts, c("merijumu_punkts", "delete"), sep = "L") %>%
separate(merijumu_punkts, c("merijumu_punkts", "delete"), sep = "V")
Expected 2 pieces. Missing pieces filled with `NA` in 600 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].Expected 2 pieces. Missing pieces filled with `NA` in 600 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

---
title: "R Notebook"
output: 
  html_notebook: 
    toc: yes
    toc_float: true
    fig_caption: true
---

# Packages
```{r}
library(tidyverse)
```

# Dataset and data cleaning
```{r}
df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vStv7Pr69DtRKv6Nw6gVBep8hbT3pEeO6B1vNwxK_1DUHgpoTgbuRpZ4SvgtHFQnBZJVGeeQVyRuXZl/pub?gid=1261585400&single=true&output=csv")
```
```{r}

df <- janitor::clean_names(df)
df  <- df %>% 
  select(merijumu_punkts:difference_e_0)

df <- df %>% 
  rename(intraoral = difference_i_o, 
         extraoral = difference_e_0)
```

# EDA

## Summary
```{r}
summary(df)
```
## Long to wide dataset (for easier calculations)
```{r}
df <- df %>% 
  gather(key = "measurement", value = "value", intraoral:extraoral)
```

## N of measurements

```{r}
df %>% 
  group_by(comparison, measurement) %>% 
  summarise(N = n()) %>% 
  spread(measurement, N)
```
## Mean and sd by type of measurement
```{r}
df %>% 
  group_by(comparison, measurement) %>% 
  summarise(Mean = mean(value)) %>% 
  spread(measurement, Mean)
```
```{r}
df %>% 
  group_by(comparison, measurement) %>% 
  summarise(sd = sd(value)) %>% 
  spread(measurement, sd)

```

## Distributions

```{r}
df %>% 
  ggplot(aes(x = value)) + 
  geom_histogram(bins = 4) + 
  facet_grid(measurement ~ .) +
  theme_minimal() + 
  labs(
    title = "Distribution of measurement", 
    y = "Count", 
    x = "Difference"
  )
```

Boxplot
```{r}
df %>% 
  ggplot(aes(x = measurement, y = value)) + 
  geom_boxplot() + 
  theme_minimal()
```





```{r}
df %>% 
  group_by(comparison, measurement) %>% 
  summarise(Mean = mean(value)) %>% 
  ggplot(aes(x = fct_reorder(comparison, Mean), y = Mean)) + 
  geom_col() + 
  facet_grid(. ~ measurement) + 
  theme_minimal()
```
Se observan diferencias entre las mediciones intraorales y las extraorales. En general, las differencias son menores para las extraorales

# Hay diferencias entre grupos?
tres factores
 - intra vs extraoral: 2 niveles
 - lugar de la medición: 19 niveles
 - comparación (1 vs 2, etc)

```{r}
comparacion <-  aov(df$value ~ df$merijumu_punkts + df$comparison + df$measurement)
```

```{r}
summary(comparacion)
```

Interpretación: hay diferencias significativas entre puntos y entre intra/extra

```{r}
df %>% 
  group_by(merijumu_punkts, measurement) %>% 
  summarise(mean = mean(value)) %>% 
  # spread(measurement, mean) %>% 
  ggplot(aes(x = fct_reorder(merijumu_punkts, mean), y = mean)) + 
  geom_boxplot() + 
  theme_minimal() +
  coord_flip()
```

```{r}
df %>% 
  ggplot(aes(x = fct_reorder(merijumu_punkts, value), y = value)) + 
  geom_col() + 
  #coord_flip() + 
  facet_wrap(~measurement)
  


```



-----------------------------------------
```{r}
library(tidyverse)
```

# Aira 2
```{r}
df2 <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vStv7Pr69DtRKv6Nw6gVBep8hbT3pEeO6B1vNwxK_1DUHgpoTgbuRpZ4SvgtHFQnBZJVGeeQVyRuXZl/pub?gid=20675042&single=true&output=csv")
```

```{r}
df2 <- janitor::clean_names(df2)
head(df2)
```


```{r}
df2 <- df2 %>% 
  gather(key = "comparison", value = "value", x1_1:x1_10)
```


```{r}
df2 %>% 
  arrange(desc(value))
```
voy a reemplazar el extraoral = 300 por 30


```{r}
which(df2$value == 300)
```
lo cambio

```{r}
df2$value[1620] = 30
```


ordeno los factores
```{r}
df2$measurement <- factor(df2$measurement, levels = c("intraoral", "extraoral", "intra_vs_extra"))
```

```{r}
table(df2$measurement)
```

cambio por valores absolutos
```{r}
df2$value <- abs(df2$value)
```

## MEAN per group

```{r}
df2 %>% 
  group_by(measurement, comparison) %>% 
  summarise(mean = mean(value)) %>% 
  spread(measurement, mean) 
```
## SD
```{r}
df2 %>% 
  group_by(measurement, comparison) %>% 
  summarise(sd = sd(value)) %>% 
  spread(measurement, sd) 
```



## Boxplot per group

### intra and extraoral

```{r}
df2 %>% 
  filter(measurement != "intra_vs_extra") %>% 
  ggplot(aes(x = measurement, y = value)) + 
  geom_boxplot() + 
  geom_jitter(alpha = 0.05) +
  theme_minimal()
```

### intra_vs_extra

```{r}
df2 %>% 
  filter(measurement == "intra_vs_extra") %>% 
  ggplot(aes(x = measurement, y = value)) + 
  geom_boxplot() + 
  geom_jitter(alpha = 0.05) +
  theme_minimal()
```

```{r}
df2 %>% 
  filter(measurement == "intra_vs_extra") %>% 
  ggplot(aes(x  = value)) + 
  geom_histogram(bins = 5) + 
  theme_minimal()
```



```{r}
df2 %>% 
  filter(measurement == "intra_vs_extra") %>% 
  group_by(merijumu_punkts) %>% 
  summarise(mean = mean(value), sd = sd(value)) %>% 
  arrange(desc(mean))
```

# Inferential

```{r}
head(df2)
```
```{r}
# create a new df with only measurement = intra_vs_extra
df2_intra_vs_extra <- df2 %>% 
  filter(measurement == "intra_vs_extra") 

# drop unused factors
df2_intra_vs_extra$measurement <- factor(df2_intra_vs_extra$measurement)
```

```{r}
model_intra_vs_extra <- aov(value ~ merijumu_punkts + comparison, data = df2_intra_vs_extra)
summary(model_intra_vs_extra)
```

```{r}
df2_mean <- mean(df2_intra_vs_extra$value)
df2_sd <- sd(df2_intra_vs_extra$value)
```

```{r}

df2_intra_vs_extra %>% 
  ggplot(aes(x = fct_reorder(merijumu_punkts, value), y = value)) +
  geom_boxplot() +
  geom_hline(yintercept = df2_mean, lty = 2, color = "darkred") +
  geom_hline(yintercept = df2_mean - df2_sd, lty = 2, color = "darkgrey" ) + 
  geom_hline(yintercept = df2_mean + df2_sd, lty = 2, color = "darkgrey" ) +
  theme_minimal() + 
  coord_flip() +
  labs(
    title = "Edit For Title", 
    y = "Edit for y axis", 
    x = "Edit for x axis"
  )
```
Convert the merijumu_punkts column
```{r}
df2_intra_vs_extra <- 
  df2_intra_vs_extra %>% 
  mutate(merijumu_punkts = str_replace_all(merijumu_punkts, " ", "") )
```


```{r}
head(df2_intra_vs_extra)
```
```{r}
df2_intra_vs_extra <- df2_intra_vs_extra %>% 
  separate(merijumu_punkts, c("merijumu_punkts", "delete"), sep = "L") %>% 
  separate(merijumu_punkts, c("merijumu_punkts", "delete"), sep = "V") 
```

```{r}
df2_intra_vs_extra <- df2_intra_vs_extra %>% 
  select(-delete)
```

```{r}
df2_intra_vs_extra %>% 
  ggplot(aes(x = fct_reorder(merijumu_punkts, value), y = value)) +
  geom_boxplot() +
  geom_hline(yintercept = df2_mean, lty = 2, color = "darkred") +
  geom_hline(yintercept = df2_mean - df2_sd, lty = 2, color = "darkgrey" ) + 
  geom_hline(yintercept = df2_mean + df2_sd, lty = 2, color = "darkgrey" ) +
  theme_minimal() + 
  coord_flip() +
  labs(
    title = "Difference Between Teeth",
    subtitle = "Red line = mean, grey lines = standard deviation", 
    y = "Difference", 
    x = "Teeth"
  )
```


