PEC_1_Software

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

library(knitr)
library(MASS)
data("birthwt")
View(birthwt)
dim(birthwt)

## [1] 189  10

length(birthwt)

## [1] 10

head(birthwt, n = 5)

##    low age lwt race smoke ptl ht ui ftv  bwt
## 85   0  19 182    2     0   0  0  1   0 2523
## 86   0  33 155    3     0   0  0  0   3 2551
## 87   0  20 105    1     1   0  0  0   1 2557
## 88   0  21 108    1     1   0  0  1   2 2594
## 89   0  18 107    1     1   0  0  1   0 2600

names(birthwt)

##  [1] "low"   "age"   "lwt"   "race"  "smoke" "ptl"   "ht"    "ui"    "ftv"  
## [10] "bwt"

#Generando un dataset
genero <- c(1, 2, 1, 1, 1, 2, 2, 2, 1, 2)
edad <- c(24, 25, 26, 24, 25, 27, 21, 22, 25, 26)
fuma <- c("no", "sí", "no", "sí", "no", "no", "sí", "no", "no", "sí")
MiDataf <- data.frame(genero, edad, fuma)
MiDataf

##    genero edad fuma
## 1       1   24   no
## 2       2   25   sí
## 3       1   26   no
## 4       1   24   sí
## 5       1   25   no
## 6       2   27   no
## 7       2   21   sí
## 8       2   22   no
## 9       1   25   no
## 10      2   26   sí

MiDataf[3:5, ]#datos de los elementos de las filas de la 3 a la 5

##   genero edad fuma
## 3      1   26   no
## 4      1   24   sí
## 5      1   25   no

MiDataf[,1] #datos de todos los elementos de la primera columna

##  [1] 1 2 1 1 1 2 2 2 1 2

MiDataf$genero #Para buscar una variable específica del conjunto de datos

##  [1] 1 2 1 1 1 2 2 2 1 2

mean(MiDataf$edad[MiDataf$fuma=="no"]) #Si por ejemplo queremos hacer la media de la edad de los pacientes que no fuman

## [1] 24.83333

with(MiDataf, {
edad_día = edad*365
edad_día
}) #usamos el comando with para calcular la edad en días y no en años

##  [1] 8760 9125 9490 8760 9125 9855 7665 8030 9125 9490

#creamos un primer data frame sobre enfermedades
Dataf_Enf1 = data.frame (enfermedad = c("diabetes", "colesterol",
"hipertensión", "hipotensión"), individuos= c("ind1", "ind2", "ind3",
"ind4"))
Dataf_Enf1 #observamos los elementos de Enf1

##     enfermedad individuos
## 1     diabetes       ind1
## 2   colesterol       ind2
## 3 hipertensión       ind3
## 4  hipotensión       ind4

#creamos un segundo data frame idéntico en variables
Dataf_Enf2 = data.frame (enfermedad = c("diabetes", "colesterol",
"hipertensión", "hipotensión"), individuos= c("ind21", "ind22", "ind23",
"ind24"))
Dataf_Enf2 #observamos los elementos de Enf2

##     enfermedad individuos
## 1     diabetes      ind21
## 2   colesterol      ind22
## 3 hipertensión      ind23
## 4  hipotensión      ind24

#combinamos los 2 data frames en 1
Dataf_Enf = rbind(Dataf_Enf1, Dataf_Enf2)
Dataf_Enf

##     enfermedad individuos
## 1     diabetes       ind1
## 2   colesterol       ind2
## 3 hipertensión       ind3
## 4  hipotensión       ind4
## 5     diabetes      ind21
## 6   colesterol      ind22
## 7 hipertensión      ind23
## 8  hipotensión      ind24

Indiv1 <- c("I213", "I214", "I215", "I216", "I217")
Medic1 <- c("Paracetamol", "Ibuprofeno", "Aspirina", "Ibuprofeno",
"Paracetamol")
Past_día <- c(2, 3, 2, 2, 2)
df_medica1 <- data.frame (Indiv1, Medic1, Past_día)
df_medica1

##   Indiv1      Medic1 Past_día
## 1   I213 Paracetamol        2
## 2   I214  Ibuprofeno        3
## 3   I215    Aspirina        2
## 4   I216  Ibuprofeno        2
## 5   I217 Paracetamol        2

Indiv2 <- c("I213", "I214", "I215", "I216", "I217")
Medic2 <- c("Paracetamol", "Ibuprofeno", "Aspirina", "Ibuprofeno",
"Paracetamol")
Past_día <- c(2, 3, 2, 2, 2)
df_medica2 <- data.frame (Indiv2, Medic2, Past_día)
df_medica2

##   Indiv2      Medic2 Past_día
## 1   I213 Paracetamol        2
## 2   I214  Ibuprofeno        3
## 3   I215    Aspirina        2
## 4   I216  Ibuprofeno        2
## 5   I217 Paracetamol        2

merge(df_medica1, df_medica2)

##    Past_día Indiv1      Medic1 Indiv2      Medic2
## 1         2   I213 Paracetamol   I213 Paracetamol
## 2         2   I213 Paracetamol   I215    Aspirina
## 3         2   I213 Paracetamol   I216  Ibuprofeno
## 4         2   I213 Paracetamol   I217 Paracetamol
## 5         2   I215    Aspirina   I213 Paracetamol
## 6         2   I215    Aspirina   I215    Aspirina
## 7         2   I215    Aspirina   I216  Ibuprofeno
## 8         2   I215    Aspirina   I217 Paracetamol
## 9         2   I216  Ibuprofeno   I213 Paracetamol
## 10        2   I216  Ibuprofeno   I215    Aspirina
## 11        2   I216  Ibuprofeno   I216  Ibuprofeno
## 12        2   I216  Ibuprofeno   I217 Paracetamol
## 13        2   I217 Paracetamol   I213 Paracetamol
## 14        2   I217 Paracetamol   I215    Aspirina
## 15        2   I217 Paracetamol   I216  Ibuprofeno
## 16        2   I217 Paracetamol   I217 Paracetamol
## 17        3   I214  Ibuprofeno   I214  Ibuprofeno

set.seed(999) #semilla aleatoria
médico_id <- 1:10 #generamos una variable con una lista ordenada para la id de cada médico
médico_nombre <- c("Ona", "Jordi", "Oriol", "Pau", "Esther", "Xavi", "Jan",
"Marta", "Anna", "Abril") #variables con el nombre de cada profesional
médico_sal <- round(rnorm(10, mean = 1500, sd = 200)) #salario estimado aleatorio que cobra cada profesional
médico_edad <- round(rnorm(10, mean = 50, sd = 8)) #variable aleatoria sobre la edad
médico_espec <- c("Neuro", "Orto", "Gine", "Trauma",rep("General",
6))#especialidad de cada profesional
df_Med_1 <- data.frame(id = médico_id[1:8], nombre = médico_nombre[1:8],
salario_mensual = médico_sal[1:8])
df_Med_2 <- data.frame(id = médico_id[-5], nombre = médico_nombre[-5],
edad = médico_edad[-5], position = médico_espec[-5])
df_Med_1

##   id nombre salario_mensual
## 1  1    Ona            1444
## 2  2  Jordi            1237
## 3  3  Oriol            1659
## 4  4    Pau            1554
## 5  5 Esther            1445
## 6  6   Xavi            1387
## 7  7    Jan            1124
## 8  8  Marta            1247

df_Med_2

##   id nombre edad position
## 1  1    Ona   61    Neuro
## 2  2  Jordi   51     Orto
## 3  3  Oriol   58     Gine
## 4  4    Pau   51   Trauma
## 5  6   Xavi   39  General
## 6  7    Jan   51  General
## 7  8  Marta   51  General
## 8  9   Anna   57  General
## 9 10  Abril   33  General

merge(x=df_Med_1, y=df_Med_2) #escribir x o y es opcional en este comando.

##   id nombre salario_mensual edad position
## 1  1    Ona            1444   61    Neuro
## 2  2  Jordi            1237   51     Orto
## 3  3  Oriol            1659   58     Gine
## 4  4    Pau            1554   51   Trauma
## 5  6   Xavi            1387   39  General
## 6  7    Jan            1124   51  General
## 7  8  Marta            1247   51  General

merge (x=df_Med_1, y=df_Med_2, all = TRUE)

##    id nombre salario_mensual edad position
## 1   1    Ona            1444   61    Neuro
## 2   2  Jordi            1237   51     Orto
## 3   3  Oriol            1659   58     Gine
## 4   4    Pau            1554   51   Trauma
## 5   5 Esther            1445   NA     <NA>
## 6   6   Xavi            1387   39  General
## 7   7    Jan            1124   51  General
## 8   8  Marta            1247   51  General
## 9   9   Anna              NA   57  General
## 10 10  Abril              NA   33  General

Df_UniIzq <- merge(x=df_Med_1, y=df_Med_2, all.x= TRUE)
Df_UniIzq

##   id nombre salario_mensual edad position
## 1  1    Ona            1444   61    Neuro
## 2  2  Jordi            1237   51     Orto
## 3  3  Oriol            1659   58     Gine
## 4  4    Pau            1554   51   Trauma
## 5  5 Esther            1445   NA     <NA>
## 6  6   Xavi            1387   39  General
## 7  7    Jan            1124   51  General
## 8  8  Marta            1247   51  General

Df_UniDer <- merge(x=df_Med_1, y=df_Med_2, all.y= TRUE)
Df_UniDer

##   id nombre salario_mensual edad position
## 1  1    Ona            1444   61    Neuro
## 2  2  Jordi            1237   51     Orto
## 3  3  Oriol            1659   58     Gine
## 4  4    Pau            1554   51   Trauma
## 5  6   Xavi            1387   39  General
## 6  7    Jan            1124   51  General
## 7  8  Marta            1247   51  General
## 8  9   Anna              NA   57  General
## 9 10  Abril              NA   33  General

Df_Cruzado <- merge (x=df_Med_1, y=df_Med_2, by=NULL)
head(Df_Cruzado) #ponemos solo las primeras filas de la combinación

##   id.x nombre.x salario_mensual id.y nombre.y edad position
## 1    1      Ona            1444    1      Ona   61    Neuro
## 2    2    Jordi            1237    1      Ona   61    Neuro
## 3    3    Oriol            1659    1      Ona   61    Neuro
## 4    4      Pau            1554    1      Ona   61    Neuro
## 5    5   Esther            1445    1      Ona   61    Neuro
## 6    6     Xavi            1387    1      Ona   61    Neuro

Id<-
c("I1","I2","I3","I4","I5","I6","I7","I8","I9","I10","I11","I12","I13","I14",
"I15","I16","I17","I18","I19","I20","I21","I22")
Edad <- c(23,24,21,22,23,25,26,24,21,22,23,25,26,24,22,21,25,26,24,21,25,27)
Sexo <-c(1,2,1,1,1,2,2,2,1,2,1,2,2,2,1,1,1,2,2,2,1,2)
Peso <-c(76.5, 81.2, 79.3, 59.5, 67.3, 78.6, 67.9, 100.2, 97.8, 56.4, 65.4,
67.5, 87.4, 99.7, 87.6, 93.4, 65.4, 73.7, 85.1, 61.2, 54.8, 103.4)
Altura <-
c(165,154,178,165,164,175,182,165,178,165,158,183,184,164,189,167,182,179,165
,158,183,184)
Pacientes <- data.frame (Id, Edad, Sexo, Peso, Altura)
Prueba1<- subset(Pacientes, select = c(Id,Edad,Sexo)) 
Prueba1

##     Id Edad Sexo
## 1   I1   23    1
## 2   I2   24    2
## 3   I3   21    1
## 4   I4   22    1
## 5   I5   23    1
## 6   I6   25    2
## 7   I7   26    2
## 8   I8   24    2
## 9   I9   21    1
## 10 I10   22    2
## 11 I11   23    1
## 12 I12   25    2
## 13 I13   26    2
## 14 I14   24    2
## 15 I15   22    1
## 16 I16   21    1
## 17 I17   25    1
## 18 I18   26    2
## 19 I19   24    2
## 20 I20   21    2
## 21 I21   25    1
## 22 I22   27    2

Prueba2<- subset(Pacientes, Edad >= 24) #apartado b
Prueba2

##     Id Edad Sexo  Peso Altura
## 2   I2   24    2  81.2    154
## 6   I6   25    2  78.6    175
## 7   I7   26    2  67.9    182
## 8   I8   24    2 100.2    165
## 12 I12   25    2  67.5    183
## 13 I13   26    2  87.4    184
## 14 I14   24    2  99.7    164
## 17 I17   25    1  65.4    182
## 18 I18   26    2  73.7    179
## 19 I19   24    2  85.1    165
## 21 I21   25    1  54.8    183
## 22 I22   27    2 103.4    184

Prueba3<- subset(Pacientes, Edad < 25, select = -c(Sexo)) #apartado c
Prueba3

##     Id Edad  Peso Altura
## 1   I1   23  76.5    165
## 2   I2   24  81.2    154
## 3   I3   21  79.3    178
## 4   I4   22  59.5    165
## 5   I5   23  67.3    164
## 8   I8   24 100.2    165
## 9   I9   21  97.8    178
## 10 I10   22  56.4    165
## 11 I11   23  65.4    158
## 14 I14   24  99.7    164
## 15 I15   22  87.6    189
## 16 I16   21  93.4    167
## 19 I19   24  85.1    165
## 20 I20   21  61.2    158

Prueba4<- subset(Pacientes, Altura <= 165 | Altura > 175) #apartado d
Prueba4

##     Id Edad Sexo  Peso Altura
## 1   I1   23    1  76.5    165
## 2   I2   24    2  81.2    154
## 3   I3   21    1  79.3    178
## 4   I4   22    1  59.5    165
## 5   I5   23    1  67.3    164
## 7   I7   26    2  67.9    182
## 8   I8   24    2 100.2    165
## 9   I9   21    1  97.8    178
## 10 I10   22    2  56.4    165
## 11 I11   23    1  65.4    158
## 12 I12   25    2  67.5    183
## 13 I13   26    2  87.4    184
## 14 I14   24    2  99.7    164
## 15 I15   22    1  87.6    189
## 17 I17   25    1  65.4    182
## 18 I18   26    2  73.7    179
## 19 I19   24    2  85.1    165
## 20 I20   21    2  61.2    158
## 21 I21   25    1  54.8    183
## 22 I22   27    2 103.4    184

set.seed(999)
f <- nrow(Pacientes) #f es el tamaño del data frame a partir del número de filas.
n <- 3 #n es el tamaño de la nueva muestra aleatoria 
i <- sample(1:f, n, replace=FALSE) #i serán las posiciones de las observaciones aleatorias.
Prueba5 <- Pacientes[i,] #La nueva muestra aleatoria
Prueba5

##   Id Edad Sexo Peso Altura
## 4 I4   22    1 59.5    165
## 7 I7   26    2 67.9    182
## 9 I9   21    1 97.8    178

library(dplyr) 
data(women)
Prueba6 <- filter(women, height > 58)
Prueba6

##    height weight
## 1      59    117
## 2      60    120
## 3      61    123
## 4      62    126
## 5      63    129
## 6      64    132
## 7      65    135
## 8      66    139
## 9      67    142
## 10     68    146
## 11     69    150
## 12     70    154
## 13     71    159
## 14     72    164

 #GRÁFICOS
plot(iris)

hist(iris$Sepal.Length)

hist(iris$Sepal.Length, breaks=c(4,5,6,7,8),
     main="Histograma de longitud del sépalo",
     xlab="cm", ylab="Frecuencia",
     xlim=c(2, 10), ylim=c(0, 60),
     col="blue") #creamos un histograma con títulos, intervalos, ejes y especificamos el color azul

boxplot(iris) #genero el boxplot con todas las variables

boxplot(iris[ ,-5], main="Diagramas de caja",
        xlab="Dimensiones", ylab="cm",
        col=c("red", "blue", "orange", "yellow"))

#EJERCICIO_1
library(MASS)
library(survival)
packageDescription("MASS")

## Package: MASS
## Priority: recommended
## Version: 7.3-61
## Date: 2024-06-10
## Revision: $Rev: 3657 $
## Depends: R (>= 4.4.0), grDevices, graphics, stats, utils
## Imports: methods
## Suggests: lattice, nlme, nnet, survival
## Authors@R: c(person("Brian", "Ripley", role = c("aut", "cre", "cph"),
##         email = "ripley@stats.ox.ac.uk"), person("Bill", "Venables",
##         role = c("aut", "cph")), person(c("Douglas", "M."), "Bates",
##         role = "ctb"), person("Kurt", "Hornik", role = "trl", comment =
##         "partial port ca 1998"), person("Albrecht", "Gebhardt", role =
##         "trl", comment = "partial port ca 1998"), person("David",
##         "Firth", role = "ctb", comment = "support functions for polr"))
## Description: Functions and datasets to support Venables and Ripley,
##         "Modern Applied Statistics with S" (4th edition, 2002).
## Title: Support Functions and Datasets for Venables and Ripley's MASS
## LazyData: yes
## ByteCompile: yes
## License: GPL-2 | GPL-3
## URL: http://www.stats.ox.ac.uk/pub/MASS4/
## Contact: <MASS@stats.ox.ac.uk>
## NeedsCompilation: yes
## Packaged: 2024-06-13 08:23:32 UTC; ripley
## Author: Brian Ripley [aut, cre, cph], Bill Venables [aut, cph], Douglas
##         M. Bates [ctb], Kurt Hornik [trl] (partial port ca 1998),
##         Albrecht Gebhardt [trl] (partial port ca 1998), David Firth
##         [ctb] (support functions for polr)
## Maintainer: Brian Ripley <ripley@stats.ox.ac.uk>
## Repository: CRAN
## Date/Publication: 2024-06-13 10:23:32
## Built: R 4.4.2; aarch64-apple-darwin20; 2024-11-01 00:59:20 UTC; unix
## 
## -- File: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/MASS/Meta/package.rds

packageDescription("survival")

## Title: Survival Analysis
## Priority: recommended
## Package: survival
## Version: 3.7-0
## Date: 2024-06-01
## Depends: R (>= 3.5.0)
## Imports: graphics, Matrix, methods, splines, stats, utils
## LazyData: Yes
## LazyDataCompression: xz
## ByteCompile: Yes
## Authors@R: c(person(c("Terry", "M"), "Therneau",
##         email="therneau.terry@mayo.edu", role=c("aut", "cre")),
##         person("Thomas", "Lumley", role=c("ctb", "trl"),
##         comment="original S->R port and R maintainer until 2009"),
##         person("Atkinson", "Elizabeth", role="ctb"), person("Crowson",
##         "Cynthia", role="ctb"))
## Description: Contains the core survival analysis routines, including
##         definition of Surv objects, Kaplan-Meier and Aalen-Johansen
##         (multi-state) curves, Cox models, and parametric accelerated
##         failure time models.
## License: LGPL (>= 2)
## URL: https://github.com/therneau/survival
## NeedsCompilation: yes
## Packaged: 2024-06-03 15:17:04 UTC; therneau
## Author: Terry M Therneau [aut, cre], Thomas Lumley [ctb, trl] (original
##         S->R port and R maintainer until 2009), Atkinson Elizabeth
##         [ctb], Crowson Cynthia [ctb]
## Maintainer: Terry M Therneau <therneau.terry@mayo.edu>
## Repository: CRAN
## Date/Publication: 2024-06-05 16:30:02 UTC
## Built: R 4.4.2; aarch64-apple-darwin20; 2024-11-01 00:59:54 UTC; unix
## 
## -- File: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/survival/Meta/package.rds

help(package = "MASS")
help(package = "survival")
help.search("Rcmdr")
library(Rcmdr)
packageDescription("Rcmdr")

## Package: Rcmdr
## Version: 2.9-5
## Date: 2024-10-23
## Title: R Commander
## Authors@R: c(person("John", "Fox", role = c("aut", "cre"), email =
##         "jfox@mcmaster.ca"), person("Milan", "Bouchet-Valat", role =
##         "aut"), person("Manuel", "Munoz Marquez", role = "aut"),
##         person("Liviu", "Andronic", role = "ctb"), person("Michael",
##         "Ash", role = "ctb"), person("Theophilius", "Boye", role =
##         "ctb"), person("Stefano", "Calza", role = "ctb"),
##         person("Andy", "Chang", role = "ctb"), person("Vilmantas",
##         "Gegzna", role = "ctb"), person("Philippe", "Grosjean", role =
##         "ctb"), person("Richard", "Heiberger", role = "ctb"),
##         person("Yoshinobu", "Kanda", role = "ctb"), person("Kosar",
##         "Karimi Pour", role = "ctb"), person("G. Jay", "Kerns", role =
##         "ctb"), person("Renaud", "Lancelot", role = "ctb"),
##         person("Matthieu", "Lesnoff", role = "ctb"), person("Uwe",
##         "Ligges", role = "ctb"), person("Samir", "Messad", role =
##         "ctb"), person("Martin", "Maechler", role = "ctb"),
##         person("Robert", "Muenchen", role = "ctb"), person("Duncan",
##         "Murdoch", role = "ctb"), person("Erich", "Neuwirth", role =
##         "ctb"), person("Dan", "Putler", role = "ctb"), person("Brian",
##         "Ripley", role = "ctb"), person("Miroslav", "Ristic", role =
##         "ctb"), person("Peter", "Wolf", role = "ctb"), person("Kevin",
##         "Wright", role="ctb") )
## Depends: R (>= 3.5.0), grDevices, graphics, methods, stats, utils,
##         splines, RcmdrMisc (>= 2.9-1), car (>= 3.1-0), effects (>=
##         4.0-3)
## Imports: tcltk, tcltk2 (>= 1.2-6), abind, relimp (>= 1.0-5), lme4,
##         tools
## Suggests: aplpack, boot, colorspace, e1071, foreign, grid, Hmisc,
##         knitr, lattice, leaps, lmtest, markdown, MASS, mgcv, multcomp
##         (>= 0.991-2), nlme, nnet, nortest, readxl, rgl (>= 0.110.2),
##         rmarkdown (>= 0.9.5), sem (>= 2.1-1)
## ByteCompile: yes
## Description: A platform-independent basic-statistics GUI (graphical
##         user interface) for R, based on the tcltk package.
## License: GPL (>= 2)
## URL: https://github.com/RCmdr-Project/rcmdr, https://www.r-project.org,
##         https://www.john-fox.ca/RCommander/index.html
## NeedsCompilation: no
## Packaged: 2024-10-24 00:46:34 UTC; johnfox
## Author: John Fox [aut, cre], Milan Bouchet-Valat [aut], Manuel Munoz
##         Marquez [aut], Liviu Andronic [ctb], Michael Ash [ctb],
##         Theophilius Boye [ctb], Stefano Calza [ctb], Andy Chang [ctb],
##         Vilmantas Gegzna [ctb], Philippe Grosjean [ctb], Richard
##         Heiberger [ctb], Yoshinobu Kanda [ctb], Kosar Karimi Pour
##         [ctb], G. Jay Kerns [ctb], Renaud Lancelot [ctb], Matthieu
##         Lesnoff [ctb], Uwe Ligges [ctb], Samir Messad [ctb], Martin
##         Maechler [ctb], Robert Muenchen [ctb], Duncan Murdoch [ctb],
##         Erich Neuwirth [ctb], Dan Putler [ctb], Brian Ripley [ctb],
##         Miroslav Ristic [ctb], Peter Wolf [ctb], Kevin Wright [ctb]
## Maintainer: John Fox <jfox@mcmaster.ca>
## Repository: CRAN
## Date/Publication: 2024-10-24 17:10:13 UTC
## Built: R 4.4.1; ; 2024-10-24 18:20:33 UTC; unix
## 
## -- File: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcmdr/Meta/package.rds

#EJERCICIO_2
setwd("~/Library/Mobile Documents/com~apple~CloudDocs/3. Bioinformatics/UOC_MSc/2025_Software_análisis_datos/PEC_1") #Elegir el directorio donde quiero que se guarden los archivos que he creado
# Crear un pequeño dataset
datos <- data.frame(
  Nombre = c("Ana", "Luis", "María"),
  Edad = c(25, 30, 28),
  Peso = c(58, 70, 62),
  Altura = c(165, 180, 170)
)

# Guardarlo en archivos
write.table(datos, "datos.txt", sep = "\t", row.names = FALSE)
write.csv(datos, "datos.csv", row.names = FALSE)
datos_txt <- read.table("datos.txt", header = TRUE, sep = "\t")
datos_csv <- read.csv("datos.csv", header = TRUE)
summary(datos_txt[, c("Edad", "Peso", "Altura")])

##       Edad            Peso           Altura     
##  Min.   :25.00   Min.   :58.00   Min.   :165.0  
##  1st Qu.:26.50   1st Qu.:60.00   1st Qu.:167.5  
##  Median :28.00   Median :62.00   Median :170.0  
##  Mean   :27.67   Mean   :63.33   Mean   :171.7  
##  3rd Qu.:29.00   3rd Qu.:66.00   3rd Qu.:175.0  
##  Max.   :30.00   Max.   :70.00   Max.   :180.0

fivenum(datos_csv$Edad)

## [1] 25.0 26.5 28.0 29.0 30.0

fivenum(datos_csv$Peso)

## [1] 58 60 62 66 70

#EJERCICIO_3
library(MASS)
data("anorexia")
head(anorexia)

##   Treat Prewt Postwt
## 1  Cont  80.7   80.2
## 2  Cont  89.4   80.1
## 3  Cont  91.8   86.4
## 4  Cont  74.0   86.3
## 5  Cont  78.1   76.1
## 6  Cont  88.3   78.1

str(anorexia)

## 'data.frame':    72 obs. of  3 variables:
##  $ Treat : Factor w/ 3 levels "CBT","Cont","FT": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Prewt : num  80.7 89.4 91.8 74 78.1 88.3 87.3 75.1 80.6 78.4 ...
##  $ Postwt: num  80.2 80.1 86.4 86.3 76.1 78.1 75.1 86.7 73.5 84.6 ...

anyNA(anorexia)  # Devuelve TRUE si hay algún NA

## [1] FALSE

colSums(is.na(anorexia))

##  Treat  Prewt Postwt 
##      0      0      0

any(sapply(anorexia, is.null))

## [1] FALSE

anorexia$Treat <- as.character(anorexia$Treat)  # Convertimos a texto para modificar
anorexia$Treat[anorexia$Treat == "CBT"]  <- "Cogn Beh Tr"
anorexia$Treat[anorexia$Treat == "Cont"] <- "Contr"
anorexia$Treat[anorexia$Treat == "FT"]   <- "Fam Tr"
anorexia$Treat <- as.factor(anorexia$Treat)     # Volvemos a factor
table(anorexia$Treat)

## 
## Cogn Beh Tr       Contr      Fam Tr 
##          29          26          17

head(anorexia)

##   Treat Prewt Postwt
## 1 Contr  80.7   80.2
## 2 Contr  89.4   80.1
## 3 Contr  91.8   86.4
## 4 Contr  74.0   86.3
## 5 Contr  78.1   76.1
## 6 Contr  88.3   78.1

#EJERCICIO_4
library(MASS)
data("biopsy")
head("biopsy")

## [1] "biopsy"

write.csv(biopsy, file="~/Library/Mobile Documents/com~apple~CloudDocs/3. Bioinformatics/UOC_MSc/2025_Software_análisis_datos/PEC_1/biopsy.csv")
data("Melanoma")
write.csv(Melanoma, file="~/Library/Mobile Documents/com~apple~CloudDocs/3. Bioinformatics/UOC_MSc/2025_Software_análisis_datos/PEC_1/melanoma.csv")
write.table(Melanoma,"~/Library/Mobile Documents/com~apple~CloudDocs/3. Bioinformatics/UOC_MSc/2025_Software_análisis_datos/PEC_1/melanoma.txt")

#EJERCICIO_5
library(MASS)
data("birthwt")
head(birthwt)

##    low age lwt race smoke ptl ht ui ftv  bwt
## 85   0  19 182    2     0   0  0  1   0 2523
## 86   0  33 155    3     0   0  0  0   3 2551
## 87   0  20 105    1     1   0  0  0   1 2557
## 88   0  21 108    1     1   0  0  1   2 2594
## 89   0  18 107    1     1   0  0  1   0 2600
## 91   0  21 124    3     0   0  0  0   0 2622

max_age <- max(birthwt$age)
max_age

## [1] 45

min_age <- min(birthwt$age)
min_age

## [1] 14

range_age <- range(birthwt$age)
range_age

## [1] 14 45

indice_minpeso <- which.min(birthwt$bwt)
birthwt[indice_minpeso, ]

##   low age lwt race smoke ptl ht ui ftv bwt
## 4   1  28 120    3     1   1  0  1   0 709

which.max(birthwt$age) #Buscamos ubicamos madre de mayor edad

## [1] 130

birthwt$bwt[which.max(birthwt$age)] #Consultamos el peso del bebé devolviendo el peso en gramos

## [1] 4990

birthwt$bwt[birthwt$ftv < 2]

##   [1] 2523 2557 2600 2622 2637 2637 2663 2665 2722 2733 2751 2769 2769 2778 2807
##  [16] 2821 2836 2863 2877 2906 2920 2920 2920 2948 2948 2977 2977 2922 3033 3062
##  [31] 3062 3062 3062 3090 3090 3100 3104 3132 3175 3175 3203 3203 3203 3225 3225
##  [46] 3232 3234 3260 3274 3317 3317 3331 3374 3374 3402 3416 3444 3459 3460 3473
##  [61] 3544 3487 3544 3572 3572 3586 3600 3614 3614 3629 3637 3643 3651 3651 3651
##  [76] 3651 3699 3728 3756 3770 3770 3770 3790 3799 3827 3884 3912 3940 3941 3941
##  [91] 3969 3997 3997 4054 4054 4111 4174 4238 4593 4990  709 1135 1330 1474 1588
## [106] 1588 1701 1729 1790 1818 1885 1893 1899 1928 1936 1970 2055 2055 2084 2084
## [121] 2100 2125 2187 2187 2211 2225 2240 2240 2282 2296 2296 2325 2353 2353 2367
## [136] 2381 2381 2381 2410 2410 2410 2424 2442 2466 2466 2495 2495

#EJERCICIO_6
#A partir del conjunto de datos anorexia trabajado en apartados anteriores, cread una matriz que tenga como columnas los valores de Prewt y Postwt, y cada fila sean los valores correspondientes para cada posición.
library(MASS)
data("anorexia")
head(anorexia)

##   Treat Prewt Postwt
## 1  Cont  80.7   80.2
## 2  Cont  89.4   80.1
## 3  Cont  91.8   86.4
## 4  Cont  74.0   86.3
## 5  Cont  78.1   76.1
## 6  Cont  88.3   78.1

matriz_pesos <- cbind(anorexia$Prewt, anorexia$Postwt) #Con la función cbind() combinamos columnas en unamatriz
head(matriz_pesos)

##      [,1] [,2]
## [1,] 80.7 80.2
## [2,] 89.4 80.1
## [3,] 91.8 86.4
## [4,] 74.0 86.3
## [5,] 78.1 76.1
## [6,] 88.3 78.1

colnames(matriz_pesos) <- c("Prewt", "Postwt")
head(matriz_pesos)

##      Prewt Postwt
## [1,]  80.7   80.2
## [2,]  89.4   80.1
## [3,]  91.8   86.4
## [4,]  74.0   86.3
## [5,]  78.1   76.1
## [6,]  88.3   78.1

is.matrix(matriz_pesos)

## [1] TRUE

#EJERCICIO_7
Identificador <-
c("I1","I2","I3","I4","I5","I6","I7","I8","I9","I10","I11","I12","I13","I14",
"I15","I16","I17","I18","I19","I20","I21","I22","I23","I24","I25")
Edad <-
c(23,24,21,22,23,25,26,24,21,22,23,25,26,24,22,21,25,26,24,21,25,27,26,22,29)
Sexo <-c(1,2,1,1,1,2,2,2,1,2,1,2,2,2,1,1,1,2,2,2,1,2,1,1,2) #1 para mujeres y 2 para hombres
Peso <-
c(76.5,81.2,79.3,59.5,67.3,78.6,67.9,100.2,97.8,56.4,65.4,67.5,87.4,99.7,87.6
,93.4,65.4,73.7,85.1,61.2,54.8,103.4,65.8,71.7,85.0)
Alt <-
c(165,154,178,165,164,175,182,165,178,165,158,183,184,164,189,167,182,179,165
,158,183,184,189,166,175) #altura en cm
Fuma <-
c("SÍ","NO","SÍ","SÍ","NO","NO","NO","SÍ","SÍ","SÍ","NO","NO","SÍ","SÍ","SÍ",
"SÍ","NO","NO","SÍ","SÍ","SÍ","NO","SÍ","NO","SÍ")
Trat_Pulmon <- data.frame(Identificador,Edad,Sexo,Peso,Alt,Fuma)
Trat_Pulmon

##    Identificador Edad Sexo  Peso Alt Fuma
## 1             I1   23    1  76.5 165   SÍ
## 2             I2   24    2  81.2 154   NO
## 3             I3   21    1  79.3 178   SÍ
## 4             I4   22    1  59.5 165   SÍ
## 5             I5   23    1  67.3 164   NO
## 6             I6   25    2  78.6 175   NO
## 7             I7   26    2  67.9 182   NO
## 8             I8   24    2 100.2 165   SÍ
## 9             I9   21    1  97.8 178   SÍ
## 10           I10   22    2  56.4 165   SÍ
## 11           I11   23    1  65.4 158   NO
## 12           I12   25    2  67.5 183   NO
## 13           I13   26    2  87.4 184   SÍ
## 14           I14   24    2  99.7 164   SÍ
## 15           I15   22    1  87.6 189   SÍ
## 16           I16   21    1  93.4 167   SÍ
## 17           I17   25    1  65.4 182   NO
## 18           I18   26    2  73.7 179   NO
## 19           I19   24    2  85.1 165   SÍ
## 20           I20   21    2  61.2 158   SÍ
## 21           I21   25    1  54.8 183   SÍ
## 22           I22   27    2 103.4 184   NO
## 23           I23   26    1  65.8 189   SÍ
## 24           I24   22    1  71.7 166   NO
## 25           I25   29    2  85.0 175   SÍ

Trat_Pulmon[Trat_Pulmon$Edad > 22, ]

##    Identificador Edad Sexo  Peso Alt Fuma
## 1             I1   23    1  76.5 165   SÍ
## 2             I2   24    2  81.2 154   NO
## 5             I5   23    1  67.3 164   NO
## 6             I6   25    2  78.6 175   NO
## 7             I7   26    2  67.9 182   NO
## 8             I8   24    2 100.2 165   SÍ
## 11           I11   23    1  65.4 158   NO
## 12           I12   25    2  67.5 183   NO
## 13           I13   26    2  87.4 184   SÍ
## 14           I14   24    2  99.7 164   SÍ
## 17           I17   25    1  65.4 182   NO
## 18           I18   26    2  73.7 179   NO
## 19           I19   24    2  85.1 165   SÍ
## 21           I21   25    1  54.8 183   SÍ
## 22           I22   27    2 103.4 184   NO
## 23           I23   26    1  65.8 189   SÍ
## 25           I25   29    2  85.0 175   SÍ

Trat_Pulmon[3, 4]

## [1] 79.3

names(Trat_Pulmon)

## [1] "Identificador" "Edad"          "Sexo"          "Peso"         
## [5] "Alt"           "Fuma"

subset(Trat_Pulmon, Edad < 27, select = -Alt) #Usar subset para seleccionar todas las filas con edad menor de 27

##    Identificador Edad Sexo  Peso Fuma
## 1             I1   23    1  76.5   SÍ
## 2             I2   24    2  81.2   NO
## 3             I3   21    1  79.3   SÍ
## 4             I4   22    1  59.5   SÍ
## 5             I5   23    1  67.3   NO
## 6             I6   25    2  78.6   NO
## 7             I7   26    2  67.9   NO
## 8             I8   24    2 100.2   SÍ
## 9             I9   21    1  97.8   SÍ
## 10           I10   22    2  56.4   SÍ
## 11           I11   23    1  65.4   NO
## 12           I12   25    2  67.5   NO
## 13           I13   26    2  87.4   SÍ
## 14           I14   24    2  99.7   SÍ
## 15           I15   22    1  87.6   SÍ
## 16           I16   21    1  93.4   SÍ
## 17           I17   25    1  65.4   NO
## 18           I18   26    2  73.7   NO
## 19           I19   24    2  85.1   SÍ
## 20           I20   21    2  61.2   SÍ
## 21           I21   25    1  54.8   SÍ
## 23           I23   26    1  65.8   SÍ
## 24           I24   22    1  71.7   NO

#EJERCICIO_8
# 1. Cargar dataset
data("ChickWeight")
head(ChickWeight)

## Grouped Data: weight ~ Time | Chick
##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
## 3     59    4     1    1
## 4     64    6     1    1
## 5     76    8     1    1
## 6     93   10     1    1

str(ChickWeight)

## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   578 obs. of  4 variables:
##  $ weight: num  42 51 59 64 76 93 106 125 149 171 ...
##  $ Time  : num  0 2 4 6 8 10 12 14 16 18 ...
##  $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
##  $ Diet  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "outer")=Class 'formula'  language ~Diet
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "labels")=List of 2
##   ..$ x: chr "Time"
##   ..$ y: chr "Body weight"
##  - attr(*, "units")=List of 2
##   ..$ x: chr "(days)"
##   ..$ y: chr "(gm)"

summary(ChickWeight)

##      weight           Time           Chick     Diet   
##  Min.   : 35.0   Min.   : 0.00   13     : 12   1:220  
##  1st Qu.: 63.0   1st Qu.: 4.00   9      : 12   2:120  
##  Median :103.0   Median :10.00   20     : 12   3:120  
##  Mean   :121.8   Mean   :10.72   10     : 12   4:118  
##  3rd Qu.:163.8   3rd Qu.:16.00   17     : 12          
##  Max.   :373.0   Max.   :21.00   19     : 12          
##                                  (Other):506

# 2. Gráfico de dispersión de weight
plot(ChickWeight$weight, 
     main = "Gráfico de dispersión del peso de los pollitos", 
     xlab = "Índice del pollito", 
     ylab = "Peso (g)", 
     pch = 19, col = "blue")

# 3. Diagrama de caja de Time
boxplot(ChickWeight$Time,
        main = "Diagrama de caja de Time",
        ylab = "Tiempo (días)",
        col = "lightgreen")

#EJERCICIO_9
library(MASS)
data("anorexia")

# 1. Calcular diferencia de peso y crear nuevo data frame
peso_ganado <- c(anorexia$Postwt - anorexia$Prewt)
anorexia_treat_df <- data.frame(Treat = anorexia$Treat,
                                PesoGanado = peso_ganado)

# 2. Seleccionar individuos que han ganado peso
anorexia_ganaron <- subset(anorexia_treat_df, PesoGanado > 0)

# 3. Filtrar solo los que recibieron tratamiento "Cont"
anorexia_treat_C_df <- subset(anorexia_ganaron, Treat == "Cont")

# 4. Mostrar resultados
head(anorexia_treat_df)

##   Treat PesoGanado
## 1  Cont       -0.5
## 2  Cont       -9.3
## 3  Cont       -5.4
## 4  Cont       12.3
## 5  Cont       -2.0
## 6  Cont      -10.2

head(anorexia_ganaron)

##    Treat PesoGanado
## 4   Cont       12.3
## 8   Cont       11.6
## 10  Cont        6.2
## 13  Cont        8.3
## 14  Cont        3.3
## 15  Cont       11.3

anorexia_treat_C_df

##    Treat PesoGanado
## 4   Cont       12.3
## 8   Cont       11.6
## 10  Cont        6.2
## 13  Cont        8.3
## 14  Cont        3.3
## 15  Cont       11.3
## 21  Cont        2.8
## 22  Cont        0.3
## 23  Cont        1.8
## 24  Cont        3.7
## 25  Cont       15.9

#EJERCICIO_10
library(knitr)

#Caso_Práctico_LAB_1
set.seed(999)

# a) Crear dataset
Id <- paste0("P", 1:30)
Edad <- sample(20:50, 30, replace = TRUE)
Gene <- c(rep(1, 15), rep(2, 15))
Trat <- factor(sample(c("A", "B", "C"), 30, replace = TRUE))
Peso <- round(rnorm(30, mean = 70, sd = 10),1)
Alt <- round(rnorm(30, mean = 170, sd = 10),1)

Datos <- data.frame(Id, Edad, Gene, Trat, Peso, Alt)

# b) Información del dataset
str(Datos)

## 'data.frame':    30 obs. of  6 variables:
##  $ Id  : chr  "P1" "P2" "P3" "P4" ...
##  $ Edad: int  46 23 48 26 28 33 20 45 29 41 ...
##  $ Gene: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Trat: Factor w/ 3 levels "A","B","C": 1 1 2 2 1 3 3 3 1 3 ...
##  $ Peso: num  67.5 48.9 66.3 75.2 75.2 56 65.1 70.1 57.2 58.9 ...
##  $ Alt : num  170 158 156 167 166 ...

summary(Datos)

##       Id                 Edad            Gene     Trat        Peso      
##  Length:30          Min.   :20.00   Min.   :1.0   A:12   Min.   :48.90  
##  Class :character   1st Qu.:26.00   1st Qu.:1.0   B:10   1st Qu.:63.77  
##  Mode  :character   Median :33.50   Median :1.5   C: 8   Median :69.90  
##                     Mean   :34.23   Mean   :1.5          Mean   :68.74  
##                     3rd Qu.:42.00   3rd Qu.:2.0          3rd Qu.:75.00  
##                     Max.   :50.00   Max.   :2.0          Max.   :87.40  
##       Alt       
##  Min.   :149.4  
##  1st Qu.:165.8  
##  Median :169.6  
##  Mean   :170.0  
##  3rd Qu.:178.5  
##  Max.   :186.6

head(Datos)

##   Id Edad Gene Trat Peso   Alt
## 1 P1   46    1    A 67.5 169.8
## 2 P2   23    1    A 48.9 158.5
## 3 P3   48    1    B 66.3 155.9
## 4 P4   26    1    B 75.2 167.2
## 5 P5   28    1    A 75.2 165.8
## 6 P6   33    1    C 56.0 180.0

# c) Crear nueva variable: IMC
Datos$IMC <- round(Datos$Peso / (Datos$Alt/100)^2,1)
head(Datos)

##   Id Edad Gene Trat Peso   Alt  IMC
## 1 P1   46    1    A 67.5 169.8 23.4
## 2 P2   23    1    A 48.9 158.5 19.5
## 3 P3   48    1    B 66.3 155.9 27.3
## 4 P4   26    1    B 75.2 167.2 26.9
## 5 P5   28    1    A 75.2 165.8 27.4
## 6 P6   33    1    C 56.0 180.0 17.3

# d) Separar por género
Df_Mujeres <- subset(Datos, Gene == 1)
Df_Hombres <- subset(Datos, Gene == 2)

# e) Recombinarlos
Datos_combinado <- rbind(Df_Mujeres, Df_Hombres)
head(Datos_combinado)

##   Id Edad Gene Trat Peso   Alt  IMC
## 1 P1   46    1    A 67.5 169.8 23.4
## 2 P2   23    1    A 48.9 158.5 19.5
## 3 P3   48    1    B 66.3 155.9 27.3
## 4 P4   26    1    B 75.2 167.2 26.9
## 5 P5   28    1    A 75.2 165.8 27.4
## 6 P6   33    1    C 56.0 180.0 17.3

PEC_1_Software

Mónica Doblas-Bajo

2025-10-13

R Markdown

Including Plots