title: “Evaluación del módulo Bioestadistica”
subtitle: “Procesamiento y Simulación de Datos Biológicos” author:
“JESUS AMADO MORINIGO SOSA” date: “Curso 2025-2026”
urlcolor: blue output: html_document: toc: no —
0. Lectura de los datos
datos_clinicos <- read.csv("data/datos_clinicos.csv", stringsAsFactors = FALSE)
farmacos <- read.csv("data/farmacos.csv", stringsAsFactors = FALSE)
metadatos <- read.csv("data/metadatos.csv", stringsAsFactors = FALSE)
kable(data.frame(
fichero = c("datos_clinicos","farmacos","metadatos"),
filas = c(nrow(datos_clinicos), nrow(farmacos), nrow(metadatos)),
columnas = c(ncol(datos_clinicos), ncol(farmacos), ncol(metadatos))
))
| datos_clinicos |
1888 |
4 |
| farmacos |
249 |
2 |
| metadatos |
249 |
4 |
1. Análisis descriptivo
df <- datos_clinicos %>%
left_join(farmacos, by = "ID") %>%
left_join(metadatos, by = "ID")
meta_unicos <- metadatos %>% distinct(ID, .keep_all = TRUE)
kable(data.frame(
Edad_media = mean(meta_unicos$Age, na.rm = TRUE),
Peso_medio = mean(meta_unicos$Weight, na.rm = TRUE)
), digits = 2)
kable(summary(df %>% select(Dias, Volumen, Metastasis)))
|
Min. : 0.0 |
Min. :22.05 |
Min. :0.000 |
|
1st Qu.: 5.0 |
1st Qu.:45.00 |
1st Qu.:0.000 |
|
Median :20.0 |
Median :48.95 |
Median :1.000 |
|
Mean :19.6 |
Mean :50.45 |
Mean :1.023 |
|
3rd Qu.:30.0 |
3rd Qu.:56.32 |
3rd Qu.:2.000 |
|
Max. :45.0 |
Max. :78.57 |
Max. :4.000 |
Ratones por sexo
kable(df %>% distinct(ID, Sex) %>% count(Sex))
Ratones por fármaco
kable(df %>% distinct(ID, Farmaco) %>% count(Farmaco) %>% arrange(desc(n)))
| Capomulin |
25 |
| Ceftamin |
25 |
| Infubinol |
25 |
| Ketapril |
25 |
| Naftisol |
25 |
| Placebo |
25 |
| Propriva |
25 |
| Ramicane |
25 |
| Zoniferol |
25 |
| Stelasyn |
24 |
Ratones macho por fármaco
kable(df %>% distinct(ID, Farmaco, Sex) %>%
filter(Sex %in% c("Male","M","macho","Macho")) %>%
count(Farmaco))
| Capomulin |
12 |
| Ceftamin |
12 |
| Infubinol |
13 |
| Ketapril |
16 |
| Naftisol |
12 |
| Placebo |
12 |
| Propriva |
13 |
| Ramicane |
16 |
| Stelasyn |
9 |
| Zoniferol |
10 |
Volumen final por fármaco
ultimo <- df %>%
group_by(ID) %>%
filter(Dias == max(Dias, na.rm = TRUE)) %>%
ungroup() %>%
distinct(ID, .keep_all = TRUE)
resumen_vol <- ultimo %>%
group_by(Farmaco) %>%
summarise(
n = n(),
min = min(Volumen),
mediana = median(Volumen),
media = mean(Volumen),
max = max(Volumen),
sd = sd(Volumen),
var = var(Volumen),
.groups="drop"
) %>%
arrange(media)
kable(resumen_vol, digits = 2)
| Ramicane |
25 |
22.05 |
36.56 |
36.19 |
45.22 |
5.67 |
32.17 |
| Capomulin |
25 |
23.34 |
38.13 |
36.67 |
47.69 |
5.72 |
32.66 |
| Propriva |
25 |
45.00 |
55.84 |
56.74 |
72.46 |
8.33 |
69.35 |
| Ceftamin |
25 |
45.00 |
59.85 |
57.75 |
68.92 |
8.37 |
69.98 |
| Infubinol |
25 |
36.32 |
60.17 |
58.18 |
72.23 |
8.60 |
74.01 |
| Zoniferol |
25 |
45.00 |
61.84 |
59.18 |
73.32 |
8.77 |
76.86 |
| Placebo |
25 |
45.00 |
62.03 |
60.51 |
73.21 |
8.87 |
78.76 |
| Stelasyn |
24 |
45.00 |
62.19 |
61.00 |
75.12 |
9.50 |
90.33 |
| Naftisol |
25 |
45.00 |
63.28 |
61.21 |
76.67 |
10.30 |
106.03 |
| Ketapril |
25 |
45.00 |
64.49 |
62.81 |
78.57 |
9.95 |
98.92 |
ggplot(ultimo, aes(x = reorder(Farmaco, Volumen, FUN = median), y = Volumen)) +
geom_boxplot() +
coord_flip()

Variación del volumen (desde 45)
ultimo <- ultimo %>% mutate(Variacion = Volumen - 45)
kable(ultimo %>%
group_by(Farmaco) %>%
summarise(media_variacion = mean(Variacion),
sd_variacion = sd(Variacion),
.groups="drop") %>%
arrange(media_variacion),
digits = 2)
| Ramicane |
-8.81 |
5.67 |
| Capomulin |
-8.33 |
5.72 |
| Propriva |
11.74 |
8.33 |
| Ceftamin |
12.75 |
8.37 |
| Infubinol |
13.18 |
8.60 |
| Zoniferol |
14.18 |
8.77 |
| Placebo |
15.51 |
8.87 |
| Stelasyn |
16.00 |
9.50 |
| Naftisol |
16.21 |
10.30 |
| Ketapril |
17.81 |
9.95 |
Metástasis final por fármaco
kable(ultimo %>%
group_by(Farmaco) %>%
summarise(media = mean(Metastasis),
mediana = median(Metastasis),
sd = sd(Metastasis),
.groups="drop") %>%
arrange(media),
digits = 2)
| Ramicane |
1.20 |
1 |
0.87 |
| Capomulin |
1.28 |
1 |
0.98 |
| Stelasyn |
1.46 |
1 |
1.06 |
| Propriva |
1.56 |
1 |
1.36 |
| Infubinol |
1.60 |
1 |
1.22 |
| Ceftamin |
1.72 |
1 |
1.43 |
| Ketapril |
1.92 |
2 |
1.66 |
| Naftisol |
2.00 |
2 |
1.35 |
| Zoniferol |
2.00 |
2 |
1.50 |
| Placebo |
2.08 |
2 |
1.50 |
ggplot(ultimo, aes(x = reorder(Farmaco, Metastasis, FUN = median), y = Metastasis)) +
geom_boxplot() +
coord_flip()

2. Subconjunto de 4 fármacos
subfarm <- c("Capomulin","Infubinol","Ketapril","Placebo")
ultimo4 <- ultimo %>% filter(Farmaco %in% subfarm)
kable(ultimo4 %>% count(Farmaco))
| Capomulin |
25 |
| Infubinol |
25 |
| Ketapril |
25 |
| Placebo |
25 |
kable(ultimo4 %>%
group_by(Farmaco) %>%
summarise(media_vol = mean(Volumen),
sd_vol = sd(Volumen),
media_met = mean(Metastasis),
sd_met = sd(Metastasis),
.groups="drop"),
digits = 2)
| Capomulin |
36.67 |
5.72 |
1.28 |
0.98 |
| Infubinol |
58.18 |
8.60 |
1.60 |
1.22 |
| Ketapril |
62.81 |
9.95 |
1.92 |
1.66 |
| Placebo |
60.51 |
8.87 |
2.08 |
1.50 |
p1 <- ggplot(ultimo4, aes(Farmaco, Volumen)) + geom_boxplot()
p2 <- ggplot(ultimo4, aes(Farmaco, Metastasis)) + geom_boxplot()
grid.arrange(p1, p2, ncol=2)

3. ANOVA
modelo_aov <- aov(Volumen ~ Farmaco, data = ultimo4)
kable(as.data.frame(anova(modelo_aov)), digits = 4)
| Farmaco |
3 |
10915.314 |
3638.4380 |
51.1816 |
0 |
| Residuals |
96 |
6824.529 |
71.0888 |
NA |
NA |
kable(as.data.frame(TukeyHSD(modelo_aov)$Farmaco), digits = 6)
| Infubinol-Capomulin |
21.510678 |
15.275454 |
27.745903 |
0.000000 |
| Ketapril-Capomulin |
26.138624 |
19.903399 |
32.373848 |
0.000000 |
| Placebo-Capomulin |
23.840847 |
17.605622 |
30.076071 |
0.000000 |
| Ketapril-Infubinol |
4.627946 |
-1.607279 |
10.863170 |
0.218170 |
| Placebo-Infubinol |
2.330168 |
-3.905056 |
8.565393 |
0.762787 |
| Placebo-Ketapril |
-2.297777 |
-8.533002 |
3.937447 |
0.770412 |
4. Correlación y Regresión
medias4 <- ultimo4 %>%
group_by(Farmaco) %>%
summarise(media = mean(Volumen), .groups="drop") %>%
arrange(media)
mejor <- medias4$Farmaco[1]
df_best <- ultimo4 %>% filter(Farmaco == mejor)
cor_res <- cor.test(df_best$Weight, df_best$Volumen)
kable(data.frame(
Farmaco = mejor,
r = unname(cor_res$estimate),
p_value = cor_res$p.value
), digits = 6)
modelo_lm <- lm(Volumen ~ Weight, data = df_best)
kable(as.data.frame(summary(modelo_lm)$coefficients), digits = 6)
| (Intercept) |
1.448147 |
4.068291 |
0.355960 |
0.725115 |
| Weight |
1.750468 |
0.200265 |
8.740754 |
0.000000 |
ggplot(df_best, aes(Weight, Volumen)) +
geom_point() +
geom_smooth(method="lm", se=TRUE)
