This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
library(knitr)
library(MASS)
data("birthwt")
View(birthwt)
dim(birthwt)
## [1] 189 10
length(birthwt)
## [1] 10
head(birthwt, n = 5)
## low age lwt race smoke ptl ht ui ftv bwt
## 85 0 19 182 2 0 0 0 1 0 2523
## 86 0 33 155 3 0 0 0 0 3 2551
## 87 0 20 105 1 1 0 0 0 1 2557
## 88 0 21 108 1 1 0 0 1 2 2594
## 89 0 18 107 1 1 0 0 1 0 2600
names(birthwt)
## [1] "low" "age" "lwt" "race" "smoke" "ptl" "ht" "ui" "ftv"
## [10] "bwt"
#Generando un dataset
genero <- c(1, 2, 1, 1, 1, 2, 2, 2, 1, 2)
edad <- c(24, 25, 26, 24, 25, 27, 21, 22, 25, 26)
fuma <- c("no", "sí", "no", "sí", "no", "no", "sí", "no", "no", "sí")
MiDataf <- data.frame(genero, edad, fuma)
MiDataf
## genero edad fuma
## 1 1 24 no
## 2 2 25 sí
## 3 1 26 no
## 4 1 24 sí
## 5 1 25 no
## 6 2 27 no
## 7 2 21 sí
## 8 2 22 no
## 9 1 25 no
## 10 2 26 sí
MiDataf[3:5, ]#datos de los elementos de las filas de la 3 a la 5
## genero edad fuma
## 3 1 26 no
## 4 1 24 sí
## 5 1 25 no
MiDataf[,1] #datos de todos los elementos de la primera columna
## [1] 1 2 1 1 1 2 2 2 1 2
MiDataf$genero #Para buscar una variable específica del conjunto de datos
## [1] 1 2 1 1 1 2 2 2 1 2
mean(MiDataf$edad[MiDataf$fuma=="no"]) #Si por ejemplo queremos hacer la media de la edad de los pacientes que no fuman
## [1] 24.83333
with(MiDataf, {
edad_día = edad*365
edad_día
}) #usamos el comando with para calcular la edad en días y no en años
## [1] 8760 9125 9490 8760 9125 9855 7665 8030 9125 9490
#creamos un primer data frame sobre enfermedades
Dataf_Enf1 = data.frame (enfermedad = c("diabetes", "colesterol",
"hipertensión", "hipotensión"), individuos= c("ind1", "ind2", "ind3",
"ind4"))
Dataf_Enf1 #observamos los elementos de Enf1
## enfermedad individuos
## 1 diabetes ind1
## 2 colesterol ind2
## 3 hipertensión ind3
## 4 hipotensión ind4
#creamos un segundo data frame idéntico en variables
Dataf_Enf2 = data.frame (enfermedad = c("diabetes", "colesterol",
"hipertensión", "hipotensión"), individuos= c("ind21", "ind22", "ind23",
"ind24"))
Dataf_Enf2 #observamos los elementos de Enf2
## enfermedad individuos
## 1 diabetes ind21
## 2 colesterol ind22
## 3 hipertensión ind23
## 4 hipotensión ind24
#combinamos los 2 data frames en 1
Dataf_Enf = rbind(Dataf_Enf1, Dataf_Enf2)
Dataf_Enf
## enfermedad individuos
## 1 diabetes ind1
## 2 colesterol ind2
## 3 hipertensión ind3
## 4 hipotensión ind4
## 5 diabetes ind21
## 6 colesterol ind22
## 7 hipertensión ind23
## 8 hipotensión ind24
Indiv1 <- c("I213", "I214", "I215", "I216", "I217")
Medic1 <- c("Paracetamol", "Ibuprofeno", "Aspirina", "Ibuprofeno",
"Paracetamol")
Past_día <- c(2, 3, 2, 2, 2)
df_medica1 <- data.frame (Indiv1, Medic1, Past_día)
df_medica1
## Indiv1 Medic1 Past_día
## 1 I213 Paracetamol 2
## 2 I214 Ibuprofeno 3
## 3 I215 Aspirina 2
## 4 I216 Ibuprofeno 2
## 5 I217 Paracetamol 2
Indiv2 <- c("I213", "I214", "I215", "I216", "I217")
Medic2 <- c("Paracetamol", "Ibuprofeno", "Aspirina", "Ibuprofeno",
"Paracetamol")
Past_día <- c(2, 3, 2, 2, 2)
df_medica2 <- data.frame (Indiv2, Medic2, Past_día)
df_medica2
## Indiv2 Medic2 Past_día
## 1 I213 Paracetamol 2
## 2 I214 Ibuprofeno 3
## 3 I215 Aspirina 2
## 4 I216 Ibuprofeno 2
## 5 I217 Paracetamol 2
merge(df_medica1, df_medica2)
## Past_día Indiv1 Medic1 Indiv2 Medic2
## 1 2 I213 Paracetamol I213 Paracetamol
## 2 2 I213 Paracetamol I215 Aspirina
## 3 2 I213 Paracetamol I216 Ibuprofeno
## 4 2 I213 Paracetamol I217 Paracetamol
## 5 2 I215 Aspirina I213 Paracetamol
## 6 2 I215 Aspirina I215 Aspirina
## 7 2 I215 Aspirina I216 Ibuprofeno
## 8 2 I215 Aspirina I217 Paracetamol
## 9 2 I216 Ibuprofeno I213 Paracetamol
## 10 2 I216 Ibuprofeno I215 Aspirina
## 11 2 I216 Ibuprofeno I216 Ibuprofeno
## 12 2 I216 Ibuprofeno I217 Paracetamol
## 13 2 I217 Paracetamol I213 Paracetamol
## 14 2 I217 Paracetamol I215 Aspirina
## 15 2 I217 Paracetamol I216 Ibuprofeno
## 16 2 I217 Paracetamol I217 Paracetamol
## 17 3 I214 Ibuprofeno I214 Ibuprofeno
set.seed(999) #semilla aleatoria
médico_id <- 1:10 #generamos una variable con una lista ordenada para la id de cada médico
médico_nombre <- c("Ona", "Jordi", "Oriol", "Pau", "Esther", "Xavi", "Jan",
"Marta", "Anna", "Abril") #variables con el nombre de cada profesional
médico_sal <- round(rnorm(10, mean = 1500, sd = 200)) #salario estimado aleatorio que cobra cada profesional
médico_edad <- round(rnorm(10, mean = 50, sd = 8)) #variable aleatoria sobre la edad
médico_espec <- c("Neuro", "Orto", "Gine", "Trauma",rep("General",
6))#especialidad de cada profesional
df_Med_1 <- data.frame(id = médico_id[1:8], nombre = médico_nombre[1:8],
salario_mensual = médico_sal[1:8])
df_Med_2 <- data.frame(id = médico_id[-5], nombre = médico_nombre[-5],
edad = médico_edad[-5], position = médico_espec[-5])
df_Med_1
## id nombre salario_mensual
## 1 1 Ona 1444
## 2 2 Jordi 1237
## 3 3 Oriol 1659
## 4 4 Pau 1554
## 5 5 Esther 1445
## 6 6 Xavi 1387
## 7 7 Jan 1124
## 8 8 Marta 1247
df_Med_2
## id nombre edad position
## 1 1 Ona 61 Neuro
## 2 2 Jordi 51 Orto
## 3 3 Oriol 58 Gine
## 4 4 Pau 51 Trauma
## 5 6 Xavi 39 General
## 6 7 Jan 51 General
## 7 8 Marta 51 General
## 8 9 Anna 57 General
## 9 10 Abril 33 General
merge(x=df_Med_1, y=df_Med_2) #escribir x o y es opcional en este comando.
## id nombre salario_mensual edad position
## 1 1 Ona 1444 61 Neuro
## 2 2 Jordi 1237 51 Orto
## 3 3 Oriol 1659 58 Gine
## 4 4 Pau 1554 51 Trauma
## 5 6 Xavi 1387 39 General
## 6 7 Jan 1124 51 General
## 7 8 Marta 1247 51 General
merge (x=df_Med_1, y=df_Med_2, all = TRUE)
## id nombre salario_mensual edad position
## 1 1 Ona 1444 61 Neuro
## 2 2 Jordi 1237 51 Orto
## 3 3 Oriol 1659 58 Gine
## 4 4 Pau 1554 51 Trauma
## 5 5 Esther 1445 NA <NA>
## 6 6 Xavi 1387 39 General
## 7 7 Jan 1124 51 General
## 8 8 Marta 1247 51 General
## 9 9 Anna NA 57 General
## 10 10 Abril NA 33 General
Df_UniIzq <- merge(x=df_Med_1, y=df_Med_2, all.x= TRUE)
Df_UniIzq
## id nombre salario_mensual edad position
## 1 1 Ona 1444 61 Neuro
## 2 2 Jordi 1237 51 Orto
## 3 3 Oriol 1659 58 Gine
## 4 4 Pau 1554 51 Trauma
## 5 5 Esther 1445 NA <NA>
## 6 6 Xavi 1387 39 General
## 7 7 Jan 1124 51 General
## 8 8 Marta 1247 51 General
Df_UniDer <- merge(x=df_Med_1, y=df_Med_2, all.y= TRUE)
Df_UniDer
## id nombre salario_mensual edad position
## 1 1 Ona 1444 61 Neuro
## 2 2 Jordi 1237 51 Orto
## 3 3 Oriol 1659 58 Gine
## 4 4 Pau 1554 51 Trauma
## 5 6 Xavi 1387 39 General
## 6 7 Jan 1124 51 General
## 7 8 Marta 1247 51 General
## 8 9 Anna NA 57 General
## 9 10 Abril NA 33 General
Df_Cruzado <- merge (x=df_Med_1, y=df_Med_2, by=NULL)
head(Df_Cruzado) #ponemos solo las primeras filas de la combinación
## id.x nombre.x salario_mensual id.y nombre.y edad position
## 1 1 Ona 1444 1 Ona 61 Neuro
## 2 2 Jordi 1237 1 Ona 61 Neuro
## 3 3 Oriol 1659 1 Ona 61 Neuro
## 4 4 Pau 1554 1 Ona 61 Neuro
## 5 5 Esther 1445 1 Ona 61 Neuro
## 6 6 Xavi 1387 1 Ona 61 Neuro
Id<-
c("I1","I2","I3","I4","I5","I6","I7","I8","I9","I10","I11","I12","I13","I14",
"I15","I16","I17","I18","I19","I20","I21","I22")
Edad <- c(23,24,21,22,23,25,26,24,21,22,23,25,26,24,22,21,25,26,24,21,25,27)
Sexo <-c(1,2,1,1,1,2,2,2,1,2,1,2,2,2,1,1,1,2,2,2,1,2)
Peso <-c(76.5, 81.2, 79.3, 59.5, 67.3, 78.6, 67.9, 100.2, 97.8, 56.4, 65.4,
67.5, 87.4, 99.7, 87.6, 93.4, 65.4, 73.7, 85.1, 61.2, 54.8, 103.4)
Altura <-
c(165,154,178,165,164,175,182,165,178,165,158,183,184,164,189,167,182,179,165
,158,183,184)
Pacientes <- data.frame (Id, Edad, Sexo, Peso, Altura)
Prueba1<- subset(Pacientes, select = c(Id,Edad,Sexo))
Prueba1
## Id Edad Sexo
## 1 I1 23 1
## 2 I2 24 2
## 3 I3 21 1
## 4 I4 22 1
## 5 I5 23 1
## 6 I6 25 2
## 7 I7 26 2
## 8 I8 24 2
## 9 I9 21 1
## 10 I10 22 2
## 11 I11 23 1
## 12 I12 25 2
## 13 I13 26 2
## 14 I14 24 2
## 15 I15 22 1
## 16 I16 21 1
## 17 I17 25 1
## 18 I18 26 2
## 19 I19 24 2
## 20 I20 21 2
## 21 I21 25 1
## 22 I22 27 2
Prueba2<- subset(Pacientes, Edad >= 24) #apartado b
Prueba2
## Id Edad Sexo Peso Altura
## 2 I2 24 2 81.2 154
## 6 I6 25 2 78.6 175
## 7 I7 26 2 67.9 182
## 8 I8 24 2 100.2 165
## 12 I12 25 2 67.5 183
## 13 I13 26 2 87.4 184
## 14 I14 24 2 99.7 164
## 17 I17 25 1 65.4 182
## 18 I18 26 2 73.7 179
## 19 I19 24 2 85.1 165
## 21 I21 25 1 54.8 183
## 22 I22 27 2 103.4 184
Prueba3<- subset(Pacientes, Edad < 25, select = -c(Sexo)) #apartado c
Prueba3
## Id Edad Peso Altura
## 1 I1 23 76.5 165
## 2 I2 24 81.2 154
## 3 I3 21 79.3 178
## 4 I4 22 59.5 165
## 5 I5 23 67.3 164
## 8 I8 24 100.2 165
## 9 I9 21 97.8 178
## 10 I10 22 56.4 165
## 11 I11 23 65.4 158
## 14 I14 24 99.7 164
## 15 I15 22 87.6 189
## 16 I16 21 93.4 167
## 19 I19 24 85.1 165
## 20 I20 21 61.2 158
Prueba4<- subset(Pacientes, Altura <= 165 | Altura > 175) #apartado d
Prueba4
## Id Edad Sexo Peso Altura
## 1 I1 23 1 76.5 165
## 2 I2 24 2 81.2 154
## 3 I3 21 1 79.3 178
## 4 I4 22 1 59.5 165
## 5 I5 23 1 67.3 164
## 7 I7 26 2 67.9 182
## 8 I8 24 2 100.2 165
## 9 I9 21 1 97.8 178
## 10 I10 22 2 56.4 165
## 11 I11 23 1 65.4 158
## 12 I12 25 2 67.5 183
## 13 I13 26 2 87.4 184
## 14 I14 24 2 99.7 164
## 15 I15 22 1 87.6 189
## 17 I17 25 1 65.4 182
## 18 I18 26 2 73.7 179
## 19 I19 24 2 85.1 165
## 20 I20 21 2 61.2 158
## 21 I21 25 1 54.8 183
## 22 I22 27 2 103.4 184
set.seed(999)
f <- nrow(Pacientes) #f es el tamaño del data frame a partir del número de filas.
n <- 3 #n es el tamaño de la nueva muestra aleatoria
i <- sample(1:f, n, replace=FALSE) #i serán las posiciones de las observaciones aleatorias.
Prueba5 <- Pacientes[i,] #La nueva muestra aleatoria
Prueba5
## Id Edad Sexo Peso Altura
## 4 I4 22 1 59.5 165
## 7 I7 26 2 67.9 182
## 9 I9 21 1 97.8 178
library(dplyr)
data(women)
Prueba6 <- filter(women, height > 58)
Prueba6
## height weight
## 1 59 117
## 2 60 120
## 3 61 123
## 4 62 126
## 5 63 129
## 6 64 132
## 7 65 135
## 8 66 139
## 9 67 142
## 10 68 146
## 11 69 150
## 12 70 154
## 13 71 159
## 14 72 164
#GRÁFICOS
plot(iris)
hist(iris$Sepal.Length)
hist(iris$Sepal.Length, breaks=c(4,5,6,7,8),
main="Histograma de longitud del sépalo",
xlab="cm", ylab="Frecuencia",
xlim=c(2, 10), ylim=c(0, 60),
col="blue") #creamos un histograma con títulos, intervalos, ejes y especificamos el color azul
boxplot(iris) #genero el boxplot con todas las variables
boxplot(iris[ ,-5], main="Diagramas de caja",
xlab="Dimensiones", ylab="cm",
col=c("red", "blue", "orange", "yellow"))
#EJERCICIO_1
library(MASS)
library(survival)
packageDescription("MASS")
## Package: MASS
## Priority: recommended
## Version: 7.3-61
## Date: 2024-06-10
## Revision: $Rev: 3657 $
## Depends: R (>= 4.4.0), grDevices, graphics, stats, utils
## Imports: methods
## Suggests: lattice, nlme, nnet, survival
## Authors@R: c(person("Brian", "Ripley", role = c("aut", "cre", "cph"),
## email = "ripley@stats.ox.ac.uk"), person("Bill", "Venables",
## role = c("aut", "cph")), person(c("Douglas", "M."), "Bates",
## role = "ctb"), person("Kurt", "Hornik", role = "trl", comment =
## "partial port ca 1998"), person("Albrecht", "Gebhardt", role =
## "trl", comment = "partial port ca 1998"), person("David",
## "Firth", role = "ctb", comment = "support functions for polr"))
## Description: Functions and datasets to support Venables and Ripley,
## "Modern Applied Statistics with S" (4th edition, 2002).
## Title: Support Functions and Datasets for Venables and Ripley's MASS
## LazyData: yes
## ByteCompile: yes
## License: GPL-2 | GPL-3
## URL: http://www.stats.ox.ac.uk/pub/MASS4/
## Contact: <MASS@stats.ox.ac.uk>
## NeedsCompilation: yes
## Packaged: 2024-06-13 08:23:32 UTC; ripley
## Author: Brian Ripley [aut, cre, cph], Bill Venables [aut, cph], Douglas
## M. Bates [ctb], Kurt Hornik [trl] (partial port ca 1998),
## Albrecht Gebhardt [trl] (partial port ca 1998), David Firth
## [ctb] (support functions for polr)
## Maintainer: Brian Ripley <ripley@stats.ox.ac.uk>
## Repository: CRAN
## Date/Publication: 2024-06-13 10:23:32
## Built: R 4.4.2; aarch64-apple-darwin20; 2024-11-01 00:59:20 UTC; unix
##
## -- File: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/MASS/Meta/package.rds
packageDescription("survival")
## Title: Survival Analysis
## Priority: recommended
## Package: survival
## Version: 3.7-0
## Date: 2024-06-01
## Depends: R (>= 3.5.0)
## Imports: graphics, Matrix, methods, splines, stats, utils
## LazyData: Yes
## LazyDataCompression: xz
## ByteCompile: Yes
## Authors@R: c(person(c("Terry", "M"), "Therneau",
## email="therneau.terry@mayo.edu", role=c("aut", "cre")),
## person("Thomas", "Lumley", role=c("ctb", "trl"),
## comment="original S->R port and R maintainer until 2009"),
## person("Atkinson", "Elizabeth", role="ctb"), person("Crowson",
## "Cynthia", role="ctb"))
## Description: Contains the core survival analysis routines, including
## definition of Surv objects, Kaplan-Meier and Aalen-Johansen
## (multi-state) curves, Cox models, and parametric accelerated
## failure time models.
## License: LGPL (>= 2)
## URL: https://github.com/therneau/survival
## NeedsCompilation: yes
## Packaged: 2024-06-03 15:17:04 UTC; therneau
## Author: Terry M Therneau [aut, cre], Thomas Lumley [ctb, trl] (original
## S->R port and R maintainer until 2009), Atkinson Elizabeth
## [ctb], Crowson Cynthia [ctb]
## Maintainer: Terry M Therneau <therneau.terry@mayo.edu>
## Repository: CRAN
## Date/Publication: 2024-06-05 16:30:02 UTC
## Built: R 4.4.2; aarch64-apple-darwin20; 2024-11-01 00:59:54 UTC; unix
##
## -- File: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/survival/Meta/package.rds
help(package = "MASS")
help(package = "survival")
help.search("Rcmdr")
library(Rcmdr)
packageDescription("Rcmdr")
## Package: Rcmdr
## Version: 2.9-5
## Date: 2024-10-23
## Title: R Commander
## Authors@R: c(person("John", "Fox", role = c("aut", "cre"), email =
## "jfox@mcmaster.ca"), person("Milan", "Bouchet-Valat", role =
## "aut"), person("Manuel", "Munoz Marquez", role = "aut"),
## person("Liviu", "Andronic", role = "ctb"), person("Michael",
## "Ash", role = "ctb"), person("Theophilius", "Boye", role =
## "ctb"), person("Stefano", "Calza", role = "ctb"),
## person("Andy", "Chang", role = "ctb"), person("Vilmantas",
## "Gegzna", role = "ctb"), person("Philippe", "Grosjean", role =
## "ctb"), person("Richard", "Heiberger", role = "ctb"),
## person("Yoshinobu", "Kanda", role = "ctb"), person("Kosar",
## "Karimi Pour", role = "ctb"), person("G. Jay", "Kerns", role =
## "ctb"), person("Renaud", "Lancelot", role = "ctb"),
## person("Matthieu", "Lesnoff", role = "ctb"), person("Uwe",
## "Ligges", role = "ctb"), person("Samir", "Messad", role =
## "ctb"), person("Martin", "Maechler", role = "ctb"),
## person("Robert", "Muenchen", role = "ctb"), person("Duncan",
## "Murdoch", role = "ctb"), person("Erich", "Neuwirth", role =
## "ctb"), person("Dan", "Putler", role = "ctb"), person("Brian",
## "Ripley", role = "ctb"), person("Miroslav", "Ristic", role =
## "ctb"), person("Peter", "Wolf", role = "ctb"), person("Kevin",
## "Wright", role="ctb") )
## Depends: R (>= 3.5.0), grDevices, graphics, methods, stats, utils,
## splines, RcmdrMisc (>= 2.9-1), car (>= 3.1-0), effects (>=
## 4.0-3)
## Imports: tcltk, tcltk2 (>= 1.2-6), abind, relimp (>= 1.0-5), lme4,
## tools
## Suggests: aplpack, boot, colorspace, e1071, foreign, grid, Hmisc,
## knitr, lattice, leaps, lmtest, markdown, MASS, mgcv, multcomp
## (>= 0.991-2), nlme, nnet, nortest, readxl, rgl (>= 0.110.2),
## rmarkdown (>= 0.9.5), sem (>= 2.1-1)
## ByteCompile: yes
## Description: A platform-independent basic-statistics GUI (graphical
## user interface) for R, based on the tcltk package.
## License: GPL (>= 2)
## URL: https://github.com/RCmdr-Project/rcmdr, https://www.r-project.org,
## https://www.john-fox.ca/RCommander/index.html
## NeedsCompilation: no
## Packaged: 2024-10-24 00:46:34 UTC; johnfox
## Author: John Fox [aut, cre], Milan Bouchet-Valat [aut], Manuel Munoz
## Marquez [aut], Liviu Andronic [ctb], Michael Ash [ctb],
## Theophilius Boye [ctb], Stefano Calza [ctb], Andy Chang [ctb],
## Vilmantas Gegzna [ctb], Philippe Grosjean [ctb], Richard
## Heiberger [ctb], Yoshinobu Kanda [ctb], Kosar Karimi Pour
## [ctb], G. Jay Kerns [ctb], Renaud Lancelot [ctb], Matthieu
## Lesnoff [ctb], Uwe Ligges [ctb], Samir Messad [ctb], Martin
## Maechler [ctb], Robert Muenchen [ctb], Duncan Murdoch [ctb],
## Erich Neuwirth [ctb], Dan Putler [ctb], Brian Ripley [ctb],
## Miroslav Ristic [ctb], Peter Wolf [ctb], Kevin Wright [ctb]
## Maintainer: John Fox <jfox@mcmaster.ca>
## Repository: CRAN
## Date/Publication: 2024-10-24 17:10:13 UTC
## Built: R 4.4.1; ; 2024-10-24 18:20:33 UTC; unix
##
## -- File: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcmdr/Meta/package.rds
#EJERCICIO_2
setwd("~/Library/Mobile Documents/com~apple~CloudDocs/3. Bioinformatics/UOC_MSc/2025_Software_análisis_datos/PEC_1") #Elegir el directorio donde quiero que se guarden los archivos que he creado
# Crear un pequeño dataset
datos <- data.frame(
Nombre = c("Ana", "Luis", "María"),
Edad = c(25, 30, 28),
Peso = c(58, 70, 62),
Altura = c(165, 180, 170)
)
# Guardarlo en archivos
write.table(datos, "datos.txt", sep = "\t", row.names = FALSE)
write.csv(datos, "datos.csv", row.names = FALSE)
datos_txt <- read.table("datos.txt", header = TRUE, sep = "\t")
datos_csv <- read.csv("datos.csv", header = TRUE)
summary(datos_txt[, c("Edad", "Peso", "Altura")])
## Edad Peso Altura
## Min. :25.00 Min. :58.00 Min. :165.0
## 1st Qu.:26.50 1st Qu.:60.00 1st Qu.:167.5
## Median :28.00 Median :62.00 Median :170.0
## Mean :27.67 Mean :63.33 Mean :171.7
## 3rd Qu.:29.00 3rd Qu.:66.00 3rd Qu.:175.0
## Max. :30.00 Max. :70.00 Max. :180.0
fivenum(datos_csv$Edad)
## [1] 25.0 26.5 28.0 29.0 30.0
fivenum(datos_csv$Peso)
## [1] 58 60 62 66 70
#EJERCICIO_3
library(MASS)
data("anorexia")
head(anorexia)
## Treat Prewt Postwt
## 1 Cont 80.7 80.2
## 2 Cont 89.4 80.1
## 3 Cont 91.8 86.4
## 4 Cont 74.0 86.3
## 5 Cont 78.1 76.1
## 6 Cont 88.3 78.1
str(anorexia)
## 'data.frame': 72 obs. of 3 variables:
## $ Treat : Factor w/ 3 levels "CBT","Cont","FT": 2 2 2 2 2 2 2 2 2 2 ...
## $ Prewt : num 80.7 89.4 91.8 74 78.1 88.3 87.3 75.1 80.6 78.4 ...
## $ Postwt: num 80.2 80.1 86.4 86.3 76.1 78.1 75.1 86.7 73.5 84.6 ...
anyNA(anorexia) # Devuelve TRUE si hay algún NA
## [1] FALSE
colSums(is.na(anorexia))
## Treat Prewt Postwt
## 0 0 0
any(sapply(anorexia, is.null))
## [1] FALSE
anorexia$Treat <- as.character(anorexia$Treat) # Convertimos a texto para modificar
anorexia$Treat[anorexia$Treat == "CBT"] <- "Cogn Beh Tr"
anorexia$Treat[anorexia$Treat == "Cont"] <- "Contr"
anorexia$Treat[anorexia$Treat == "FT"] <- "Fam Tr"
anorexia$Treat <- as.factor(anorexia$Treat) # Volvemos a factor
table(anorexia$Treat)
##
## Cogn Beh Tr Contr Fam Tr
## 29 26 17
head(anorexia)
## Treat Prewt Postwt
## 1 Contr 80.7 80.2
## 2 Contr 89.4 80.1
## 3 Contr 91.8 86.4
## 4 Contr 74.0 86.3
## 5 Contr 78.1 76.1
## 6 Contr 88.3 78.1
#EJERCICIO_4
library(MASS)
data("biopsy")
head("biopsy")
## [1] "biopsy"
write.csv(biopsy, file="~/Library/Mobile Documents/com~apple~CloudDocs/3. Bioinformatics/UOC_MSc/2025_Software_análisis_datos/PEC_1/biopsy.csv")
data("Melanoma")
write.csv(Melanoma, file="~/Library/Mobile Documents/com~apple~CloudDocs/3. Bioinformatics/UOC_MSc/2025_Software_análisis_datos/PEC_1/melanoma.csv")
write.table(Melanoma,"~/Library/Mobile Documents/com~apple~CloudDocs/3. Bioinformatics/UOC_MSc/2025_Software_análisis_datos/PEC_1/melanoma.txt")
#EJERCICIO_5
library(MASS)
data("birthwt")
head(birthwt)
## low age lwt race smoke ptl ht ui ftv bwt
## 85 0 19 182 2 0 0 0 1 0 2523
## 86 0 33 155 3 0 0 0 0 3 2551
## 87 0 20 105 1 1 0 0 0 1 2557
## 88 0 21 108 1 1 0 0 1 2 2594
## 89 0 18 107 1 1 0 0 1 0 2600
## 91 0 21 124 3 0 0 0 0 0 2622
max_age <- max(birthwt$age)
max_age
## [1] 45
min_age <- min(birthwt$age)
min_age
## [1] 14
range_age <- range(birthwt$age)
range_age
## [1] 14 45
indice_minpeso <- which.min(birthwt$bwt)
birthwt[indice_minpeso, ]
## low age lwt race smoke ptl ht ui ftv bwt
## 4 1 28 120 3 1 1 0 1 0 709
which.max(birthwt$age) #Buscamos ubicamos madre de mayor edad
## [1] 130
birthwt$bwt[which.max(birthwt$age)] #Consultamos el peso del bebé devolviendo el peso en gramos
## [1] 4990
birthwt$bwt[birthwt$ftv < 2]
## [1] 2523 2557 2600 2622 2637 2637 2663 2665 2722 2733 2751 2769 2769 2778 2807
## [16] 2821 2836 2863 2877 2906 2920 2920 2920 2948 2948 2977 2977 2922 3033 3062
## [31] 3062 3062 3062 3090 3090 3100 3104 3132 3175 3175 3203 3203 3203 3225 3225
## [46] 3232 3234 3260 3274 3317 3317 3331 3374 3374 3402 3416 3444 3459 3460 3473
## [61] 3544 3487 3544 3572 3572 3586 3600 3614 3614 3629 3637 3643 3651 3651 3651
## [76] 3651 3699 3728 3756 3770 3770 3770 3790 3799 3827 3884 3912 3940 3941 3941
## [91] 3969 3997 3997 4054 4054 4111 4174 4238 4593 4990 709 1135 1330 1474 1588
## [106] 1588 1701 1729 1790 1818 1885 1893 1899 1928 1936 1970 2055 2055 2084 2084
## [121] 2100 2125 2187 2187 2211 2225 2240 2240 2282 2296 2296 2325 2353 2353 2367
## [136] 2381 2381 2381 2410 2410 2410 2424 2442 2466 2466 2495 2495
#EJERCICIO_6
#A partir del conjunto de datos anorexia trabajado en apartados anteriores, cread una matriz que tenga como columnas los valores de Prewt y Postwt, y cada fila sean los valores correspondientes para cada posición.
library(MASS)
data("anorexia")
head(anorexia)
## Treat Prewt Postwt
## 1 Cont 80.7 80.2
## 2 Cont 89.4 80.1
## 3 Cont 91.8 86.4
## 4 Cont 74.0 86.3
## 5 Cont 78.1 76.1
## 6 Cont 88.3 78.1
matriz_pesos <- cbind(anorexia$Prewt, anorexia$Postwt) #Con la función cbind() combinamos columnas en unamatriz
head(matriz_pesos)
## [,1] [,2]
## [1,] 80.7 80.2
## [2,] 89.4 80.1
## [3,] 91.8 86.4
## [4,] 74.0 86.3
## [5,] 78.1 76.1
## [6,] 88.3 78.1
colnames(matriz_pesos) <- c("Prewt", "Postwt")
head(matriz_pesos)
## Prewt Postwt
## [1,] 80.7 80.2
## [2,] 89.4 80.1
## [3,] 91.8 86.4
## [4,] 74.0 86.3
## [5,] 78.1 76.1
## [6,] 88.3 78.1
is.matrix(matriz_pesos)
## [1] TRUE
#EJERCICIO_7
Identificador <-
c("I1","I2","I3","I4","I5","I6","I7","I8","I9","I10","I11","I12","I13","I14",
"I15","I16","I17","I18","I19","I20","I21","I22","I23","I24","I25")
Edad <-
c(23,24,21,22,23,25,26,24,21,22,23,25,26,24,22,21,25,26,24,21,25,27,26,22,29)
Sexo <-c(1,2,1,1,1,2,2,2,1,2,1,2,2,2,1,1,1,2,2,2,1,2,1,1,2) #1 para mujeres y 2 para hombres
Peso <-
c(76.5,81.2,79.3,59.5,67.3,78.6,67.9,100.2,97.8,56.4,65.4,67.5,87.4,99.7,87.6
,93.4,65.4,73.7,85.1,61.2,54.8,103.4,65.8,71.7,85.0)
Alt <-
c(165,154,178,165,164,175,182,165,178,165,158,183,184,164,189,167,182,179,165
,158,183,184,189,166,175) #altura en cm
Fuma <-
c("SÍ","NO","SÍ","SÍ","NO","NO","NO","SÍ","SÍ","SÍ","NO","NO","SÍ","SÍ","SÍ",
"SÍ","NO","NO","SÍ","SÍ","SÍ","NO","SÍ","NO","SÍ")
Trat_Pulmon <- data.frame(Identificador,Edad,Sexo,Peso,Alt,Fuma)
Trat_Pulmon
## Identificador Edad Sexo Peso Alt Fuma
## 1 I1 23 1 76.5 165 SÍ
## 2 I2 24 2 81.2 154 NO
## 3 I3 21 1 79.3 178 SÍ
## 4 I4 22 1 59.5 165 SÍ
## 5 I5 23 1 67.3 164 NO
## 6 I6 25 2 78.6 175 NO
## 7 I7 26 2 67.9 182 NO
## 8 I8 24 2 100.2 165 SÍ
## 9 I9 21 1 97.8 178 SÍ
## 10 I10 22 2 56.4 165 SÍ
## 11 I11 23 1 65.4 158 NO
## 12 I12 25 2 67.5 183 NO
## 13 I13 26 2 87.4 184 SÍ
## 14 I14 24 2 99.7 164 SÍ
## 15 I15 22 1 87.6 189 SÍ
## 16 I16 21 1 93.4 167 SÍ
## 17 I17 25 1 65.4 182 NO
## 18 I18 26 2 73.7 179 NO
## 19 I19 24 2 85.1 165 SÍ
## 20 I20 21 2 61.2 158 SÍ
## 21 I21 25 1 54.8 183 SÍ
## 22 I22 27 2 103.4 184 NO
## 23 I23 26 1 65.8 189 SÍ
## 24 I24 22 1 71.7 166 NO
## 25 I25 29 2 85.0 175 SÍ
Trat_Pulmon[Trat_Pulmon$Edad > 22, ]
## Identificador Edad Sexo Peso Alt Fuma
## 1 I1 23 1 76.5 165 SÍ
## 2 I2 24 2 81.2 154 NO
## 5 I5 23 1 67.3 164 NO
## 6 I6 25 2 78.6 175 NO
## 7 I7 26 2 67.9 182 NO
## 8 I8 24 2 100.2 165 SÍ
## 11 I11 23 1 65.4 158 NO
## 12 I12 25 2 67.5 183 NO
## 13 I13 26 2 87.4 184 SÍ
## 14 I14 24 2 99.7 164 SÍ
## 17 I17 25 1 65.4 182 NO
## 18 I18 26 2 73.7 179 NO
## 19 I19 24 2 85.1 165 SÍ
## 21 I21 25 1 54.8 183 SÍ
## 22 I22 27 2 103.4 184 NO
## 23 I23 26 1 65.8 189 SÍ
## 25 I25 29 2 85.0 175 SÍ
Trat_Pulmon[3, 4]
## [1] 79.3
names(Trat_Pulmon)
## [1] "Identificador" "Edad" "Sexo" "Peso"
## [5] "Alt" "Fuma"
subset(Trat_Pulmon, Edad < 27, select = -Alt) #Usar subset para seleccionar todas las filas con edad menor de 27
## Identificador Edad Sexo Peso Fuma
## 1 I1 23 1 76.5 SÍ
## 2 I2 24 2 81.2 NO
## 3 I3 21 1 79.3 SÍ
## 4 I4 22 1 59.5 SÍ
## 5 I5 23 1 67.3 NO
## 6 I6 25 2 78.6 NO
## 7 I7 26 2 67.9 NO
## 8 I8 24 2 100.2 SÍ
## 9 I9 21 1 97.8 SÍ
## 10 I10 22 2 56.4 SÍ
## 11 I11 23 1 65.4 NO
## 12 I12 25 2 67.5 NO
## 13 I13 26 2 87.4 SÍ
## 14 I14 24 2 99.7 SÍ
## 15 I15 22 1 87.6 SÍ
## 16 I16 21 1 93.4 SÍ
## 17 I17 25 1 65.4 NO
## 18 I18 26 2 73.7 NO
## 19 I19 24 2 85.1 SÍ
## 20 I20 21 2 61.2 SÍ
## 21 I21 25 1 54.8 SÍ
## 23 I23 26 1 65.8 SÍ
## 24 I24 22 1 71.7 NO
#EJERCICIO_8
# 1. Cargar dataset
data("ChickWeight")
head(ChickWeight)
## Grouped Data: weight ~ Time | Chick
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
str(ChickWeight)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 578 obs. of 4 variables:
## $ weight: num 42 51 59 64 76 93 106 125 149 171 ...
## $ Time : num 0 2 4 6 8 10 12 14 16 18 ...
## $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
## $ Diet : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "outer")=Class 'formula' language ~Diet
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Time"
## ..$ y: chr "Body weight"
## - attr(*, "units")=List of 2
## ..$ x: chr "(days)"
## ..$ y: chr "(gm)"
summary(ChickWeight)
## weight Time Chick Diet
## Min. : 35.0 Min. : 0.00 13 : 12 1:220
## 1st Qu.: 63.0 1st Qu.: 4.00 9 : 12 2:120
## Median :103.0 Median :10.00 20 : 12 3:120
## Mean :121.8 Mean :10.72 10 : 12 4:118
## 3rd Qu.:163.8 3rd Qu.:16.00 17 : 12
## Max. :373.0 Max. :21.00 19 : 12
## (Other):506
# 2. Gráfico de dispersión de weight
plot(ChickWeight$weight,
main = "Gráfico de dispersión del peso de los pollitos",
xlab = "Índice del pollito",
ylab = "Peso (g)",
pch = 19, col = "blue")
# 3. Diagrama de caja de Time
boxplot(ChickWeight$Time,
main = "Diagrama de caja de Time",
ylab = "Tiempo (días)",
col = "lightgreen")
#EJERCICIO_9
library(MASS)
data("anorexia")
# 1. Calcular diferencia de peso y crear nuevo data frame
peso_ganado <- c(anorexia$Postwt - anorexia$Prewt)
anorexia_treat_df <- data.frame(Treat = anorexia$Treat,
PesoGanado = peso_ganado)
# 2. Seleccionar individuos que han ganado peso
anorexia_ganaron <- subset(anorexia_treat_df, PesoGanado > 0)
# 3. Filtrar solo los que recibieron tratamiento "Cont"
anorexia_treat_C_df <- subset(anorexia_ganaron, Treat == "Cont")
# 4. Mostrar resultados
head(anorexia_treat_df)
## Treat PesoGanado
## 1 Cont -0.5
## 2 Cont -9.3
## 3 Cont -5.4
## 4 Cont 12.3
## 5 Cont -2.0
## 6 Cont -10.2
head(anorexia_ganaron)
## Treat PesoGanado
## 4 Cont 12.3
## 8 Cont 11.6
## 10 Cont 6.2
## 13 Cont 8.3
## 14 Cont 3.3
## 15 Cont 11.3
anorexia_treat_C_df
## Treat PesoGanado
## 4 Cont 12.3
## 8 Cont 11.6
## 10 Cont 6.2
## 13 Cont 8.3
## 14 Cont 3.3
## 15 Cont 11.3
## 21 Cont 2.8
## 22 Cont 0.3
## 23 Cont 1.8
## 24 Cont 3.7
## 25 Cont 15.9
#EJERCICIO_10
library(knitr)
#Caso_Práctico_LAB_1
set.seed(999)
# a) Crear dataset
Id <- paste0("P", 1:30)
Edad <- sample(20:50, 30, replace = TRUE)
Gene <- c(rep(1, 15), rep(2, 15))
Trat <- factor(sample(c("A", "B", "C"), 30, replace = TRUE))
Peso <- round(rnorm(30, mean = 70, sd = 10),1)
Alt <- round(rnorm(30, mean = 170, sd = 10),1)
Datos <- data.frame(Id, Edad, Gene, Trat, Peso, Alt)
# b) Información del dataset
str(Datos)
## 'data.frame': 30 obs. of 6 variables:
## $ Id : chr "P1" "P2" "P3" "P4" ...
## $ Edad: int 46 23 48 26 28 33 20 45 29 41 ...
## $ Gene: num 1 1 1 1 1 1 1 1 1 1 ...
## $ Trat: Factor w/ 3 levels "A","B","C": 1 1 2 2 1 3 3 3 1 3 ...
## $ Peso: num 67.5 48.9 66.3 75.2 75.2 56 65.1 70.1 57.2 58.9 ...
## $ Alt : num 170 158 156 167 166 ...
summary(Datos)
## Id Edad Gene Trat Peso
## Length:30 Min. :20.00 Min. :1.0 A:12 Min. :48.90
## Class :character 1st Qu.:26.00 1st Qu.:1.0 B:10 1st Qu.:63.77
## Mode :character Median :33.50 Median :1.5 C: 8 Median :69.90
## Mean :34.23 Mean :1.5 Mean :68.74
## 3rd Qu.:42.00 3rd Qu.:2.0 3rd Qu.:75.00
## Max. :50.00 Max. :2.0 Max. :87.40
## Alt
## Min. :149.4
## 1st Qu.:165.8
## Median :169.6
## Mean :170.0
## 3rd Qu.:178.5
## Max. :186.6
head(Datos)
## Id Edad Gene Trat Peso Alt
## 1 P1 46 1 A 67.5 169.8
## 2 P2 23 1 A 48.9 158.5
## 3 P3 48 1 B 66.3 155.9
## 4 P4 26 1 B 75.2 167.2
## 5 P5 28 1 A 75.2 165.8
## 6 P6 33 1 C 56.0 180.0
# c) Crear nueva variable: IMC
Datos$IMC <- round(Datos$Peso / (Datos$Alt/100)^2,1)
head(Datos)
## Id Edad Gene Trat Peso Alt IMC
## 1 P1 46 1 A 67.5 169.8 23.4
## 2 P2 23 1 A 48.9 158.5 19.5
## 3 P3 48 1 B 66.3 155.9 27.3
## 4 P4 26 1 B 75.2 167.2 26.9
## 5 P5 28 1 A 75.2 165.8 27.4
## 6 P6 33 1 C 56.0 180.0 17.3
# d) Separar por género
Df_Mujeres <- subset(Datos, Gene == 1)
Df_Hombres <- subset(Datos, Gene == 2)
# e) Recombinarlos
Datos_combinado <- rbind(Df_Mujeres, Df_Hombres)
head(Datos_combinado)
## Id Edad Gene Trat Peso Alt IMC
## 1 P1 46 1 A 67.5 169.8 23.4
## 2 P2 23 1 A 48.9 158.5 19.5
## 3 P3 48 1 B 66.3 155.9 27.3
## 4 P4 26 1 B 75.2 167.2 26.9
## 5 P5 28 1 A 75.2 165.8 27.4
## 6 P6 33 1 C 56.0 180.0 17.3
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.