Ejercicio 1
# Comprobar paquetes instalados en nuestra versión
sessionInfo()
## R version 4.3.3 (2024-02-29 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_Europe.utf8 LC_CTYPE=English_Europe.utf8
## [3] LC_MONETARY=English_Europe.utf8 LC_NUMERIC=C
## [5] LC_TIME=English_Europe.utf8
##
## time zone: Europe/Madrid
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.34 R6_2.5.1 fastmap_1.1.1 xfun_0.42
## [5] cachem_1.0.8 knitr_1.45 htmltools_0.5.7 rmarkdown_2.25
## [9] lifecycle_1.0.4 cli_3.6.2 sass_0.4.8 jquerylib_0.1.4
## [13] compiler_4.3.3 rstudioapi_0.15.0 tools_4.3.3 evaluate_0.23
## [17] bslib_0.6.1 yaml_2.3.8 rlang_1.1.3 jsonlite_1.8.8
# Instalar paquetes MASS y Survival
install.packages(c("MASS", "survival"), repos = "http://cran.us.r-project.org")
## Installing packages into 'C:/Users/adrii/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
packages_to_load <- c("MASS", "survival")
lapply(packages_to_load, library, character.only=TRUE)
# Lista de paquetes ya cargados
(.packages())
## [1] "survival" "MASS" "stats" "graphics" "grDevices" "utils"
## [7] "datasets" "methods" "base"
??Rcmdr
## starting httpd help server ... done
Ejercicio 2
# Importar archivo de texto y crear resumen de 3 variables
data <- read.delim("ejemplo_datos_LAB1.txt", sep="")
summary(data$salario_mensual)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1124 1242 1387 1379 1499 1659
summary(data$edad)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 39.00 51.00 51.00 51.71 54.50 61.00
summary(data$position)
## Length Class Mode
## 7 character character
# Importar un archivo .csv y buscar fivenum de 2 variables
data_csv <- read.csv("ejemplo_csv_LAB1.csv")
fivenum(data_csv$salario_mensual)
## [1] 1124 1242 1387 1499 1659
fivenum(data_csv$edad)
## [1] 39.0 51.0 51.0 54.5 61.0
Ejercicio 3
# Importar conjunto de datos anorexia y mostrar tipo de datos
anorexia <- as.data.frame(anorexia)
sapply(anorexia, class)
## Treat Prewt Postwt
## "factor" "numeric" "numeric"
# Comprobar si existen NA o NULL
sum(is.na(anorexia$Treat)|is.null(anorexia$Treat))
## [1] 0
sum(is.na(anorexia$Prewt)|is.null(anorexia$Prewt))
## [1] 0
sum(is.na(anorexia$Postwt)|is.null(anorexia$Postwt))
## [1] 0
# Transformar valores "CBT", "Cont" y "FT" en "Cogn Beh Tr", "Contr" y "Fam Tr"
anorexia$Treat <- as.character(anorexia$Treat)
anorexia[anorexia == "CBT"] <- "Cogn Beh Tr"
anorexia[anorexia == "Cont"] <- "Contr"
anorexia[anorexia == "FT"] <- "Fam Tr"
anorexia$Treat <- as.factor(anorexia$Treat)
summary(anorexia)
## Treat Prewt Postwt
## Cogn Beh Tr:29 Min. :70.00 Min. : 71.30
## Contr :26 1st Qu.:79.60 1st Qu.: 79.33
## Fam Tr :17 Median :82.30 Median : 84.05
## Mean :82.41 Mean : 85.17
## 3rd Qu.:86.00 3rd Qu.: 91.55
## Max. :94.90 Max. :103.60
Ejercicio 4
# Exportar datos de "biopsy"
write.csv(biopsy, file = "biopsy.csv")
# Exportar datos de "melanoma" a 3 formatos distintos
library(openxlsx)
write.csv(Melanoma, file = "melanoma.csv")
write.table(Melanoma, file = "melanoma.txt")
write.xlsx(Melanoma, file = "melanoma.xlsx")

# Crear resumen de la variable "edad" y exportar a documento Word
melanoma_age <- as.array(summary(Melanoma$age))
melanoma_age
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.00 42.00 54.00 52.46 65.00 95.00
write.table(melanoma_age, file = "melanoma.doc")
# Importar conjunto de datos desde fichero .csv
beans <- read.csv("P:/AntonioAdri/WinDocuments/Adri/DryBeanDataset/Dry_Bean_Dataset.csv")
Ejercicio 5
# Máxima y mínima edad de las madres en birthwt
library(MASS)
birth_data <- as.data.frame(birthwt)
max(birth_data$age)
## [1] 45
min(birth_data$age)
## [1] 14
# Rango de edad de las madres
max(birth_data$age) - min(birth_data$age)
## [1] 31
# Comprobar si la madre cuyo recién nacido tenía el menor peso fumaba
birth_data$smoke[birth_data$bwt == min(birth_data$bwt)]
## [1] 1
# Pesos de los recién nacidos cuyas madres visitaron al menos dos veces al médico
subset(birth_data$bwt, birth_data$ftv == 3)
## [1] 2551 2782 2835 2126 2414 2450 2495
Ejercicio 6
# Crear matriz con los datos Prewt y Postwt
matrix_anorexia <- matrix(c(anorexia$Prewt,anorexia$Postwt),ncol=2)
head(matrix_anorexia)
## [,1] [,2]
## [1,] 80.7 80.2
## [2,] 89.4 80.1
## [3,] 91.8 86.4
## [4,] 74.0 86.3
## [5,] 78.1 76.1
## [6,] 88.3 78.1
Ejercicio 7
# Generar conjunto de datos
Identificador <- c("I1","I2","I3","I4","I5","I6","I7","I8","I9","I10","I11","I12","I13","I14", "I15","I16","I17","I18","I19","I20","I21","I22","I23","I24","I25")
Edad <- c(23,24,21,22,23,25,26,24,21,22,23,25,26,24,22,21,25,26,24,21,25,27,26,22,29)
Sexo <- c(1,2,1,1,1,2,2,2,1,2,1,2,2,2,1,1,1,2,2,2,1,2,1,1,2) #1 para mujeres y 2 para hombres
Peso <- c(76.5,81.2,79.3,59.5,67.3,78.6,67.9,100.2,97.8,56.4,65.4,67.5,87.4,99.7,87.6 ,93.4,65.4,73.7,85.1,61.2,54.8,103.4,65.8,71.7,85.0)
Alt <- c(165,154,178,165,164,175,182,165,178,165,158,183,184,164,189,167,182,179,165 ,158,183,184,189,166,175) #altura en cm
Fuma <- c("SÍ","NO","SÍ","SÍ","NO","NO","NO","SÍ","SÍ","SÍ","NO","NO","SÍ","SÍ","SÍ", "SÍ","NO","NO","SÍ","SÍ","SÍ","NO","SÍ","NO","SÍ")
Trat_Pulmón <- data.frame(Identificador,Edad,Sexo,Peso,Alt,Fuma)
head(Trat_Pulmón)
# Registros con edad < 22
subset1 <- subset(Trat_Pulmón, Edad > 22)
head(subset1)
# Registro 3 de la columna 4
Trat_Pulmón[3,4]
## [1] 79.3
#Registro con edad < 27 y sin incluir Alt
subset3 <- subset(Trat_Pulmón, Edad < 27, select = -Alt)
head(subset3)
Ejercicio 8
# Incorporar conjunto de datos chickweight
chick_wt <- as.data.frame(ChickWeight)
# Crear gráfico de dispersión de la variable weight
plot(chick_wt$weight)

# Crear diagrama de caja con la variable Time
boxplot(chick_wt$Time, xlab = "Time")

Ejercicio 9
# Crear nuevo dataframe a partir de anorexia
delta_prewt_postwt <- as.vector(anorexia$Postwt - anorexia$Prewt)
anorexia_treat_df <- data.frame(Treat = MASS::anorexia$Treat, Difference_Prewt_Postwt = delta_prewt_postwt)
head(anorexia_treat_df)
# Seleccionar aquellos con Treatment "Cont" y que hayan ganado peso
anorexia_treat_C_df <- subset(anorexia_treat_df, Difference_Prewt_Postwt > 0 & Treat == "Cont")
head(anorexia_treat_C_df)
Caso práctico
# Crear conjunto de datos con las variables de la tabla
set.seed(30)
Identificador <- as.character(seq(1:30))
Edad <- sample(18:99, 30, replace = T)
Genero <- sample(1:2, 30, replace = T)
Tratamiento <- as.factor(sample(c("A", "B", "C"), 30, replace = T))
Peso <- sample(40:90, 30, replace = T)
Estatura <- sample(150:190, 30, replace = T)
datos_practica <- data.frame(Id = Identificador, Edad = Edad, Gene = Genero, Trat = Tratamiento, Peso = Peso, Alt = Estatura)
summary(datos_practica)
## Id Edad Gene Trat Peso
## Length:30 Min. :18.00 Min. :1.0 A:10 Min. :40.00
## Class :character 1st Qu.:47.00 1st Qu.:1.0 B: 9 1st Qu.:54.25
## Mode :character Median :66.00 Median :1.5 C:11 Median :63.50
## Mean :63.23 Mean :1.5 Mean :65.90
## 3rd Qu.:81.75 3rd Qu.:2.0 3rd Qu.:77.75
## Max. :99.00 Max. :2.0 Max. :90.00
## Alt
## Min. :150.0
## 1st Qu.:156.0
## Median :164.0
## Mean :165.5
## 3rd Qu.:172.0
## Max. :190.0
# Crear variable IMC e incluir en nuestro conjunto de datos
IMC <- datos_practica$Peso/datos_practica$Alt
datos_practica$IMC <- round(IMC,2)
head(datos_practica)
# Crear 2 subconjuntos diferenciados por género
Df_Hombres <- subset(datos_practica, Gene == 2)
head(Df_Hombres)
Df_Mujeres <- subset(datos_practica, Gene == 1)
head(Df_Mujeres)
# Combinar subconjuntos
datos_practica_2 <- rbind(Df_Hombres, Df_Mujeres)
datos_practica_2$Id <- as.integer(datos_practica_2$Id)
head(datos_practica_2[order(datos_practica_2$Id),])