Ejercicio 1

# Comprobar paquetes instalados en nuestra versión
sessionInfo()
## R version 4.3.3 (2024-02-29 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_Europe.utf8  LC_CTYPE=English_Europe.utf8   
## [3] LC_MONETARY=English_Europe.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=English_Europe.utf8    
## 
## time zone: Europe/Madrid
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.34     R6_2.5.1          fastmap_1.1.1     xfun_0.42        
##  [5] cachem_1.0.8      knitr_1.45        htmltools_0.5.7   rmarkdown_2.25   
##  [9] lifecycle_1.0.4   cli_3.6.2         sass_0.4.8        jquerylib_0.1.4  
## [13] compiler_4.3.3    rstudioapi_0.15.0 tools_4.3.3       evaluate_0.23    
## [17] bslib_0.6.1       yaml_2.3.8        rlang_1.1.3       jsonlite_1.8.8
# Instalar paquetes MASS y Survival
install.packages(c("MASS", "survival"), repos = "http://cran.us.r-project.org")
## Installing packages into 'C:/Users/adrii/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
packages_to_load <- c("MASS", "survival")
lapply(packages_to_load, library, character.only=TRUE)
# Lista de paquetes ya cargados
(.packages())
## [1] "survival"  "MASS"      "stats"     "graphics"  "grDevices" "utils"    
## [7] "datasets"  "methods"   "base"
??Rcmdr 
## starting httpd help server ... done

Ejercicio 2

# Importar archivo de texto y crear resumen de 3 variables
data <- read.delim("ejemplo_datos_LAB1.txt", sep="")
summary(data$salario_mensual)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1124    1242    1387    1379    1499    1659
summary(data$edad)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   39.00   51.00   51.00   51.71   54.50   61.00
summary(data$position)
##    Length     Class      Mode 
##         7 character character
# Importar un archivo .csv y buscar fivenum de 2 variables  
data_csv <- read.csv("ejemplo_csv_LAB1.csv")
fivenum(data_csv$salario_mensual)
## [1] 1124 1242 1387 1499 1659
fivenum(data_csv$edad)
## [1] 39.0 51.0 51.0 54.5 61.0

Ejercicio 3

# Importar conjunto de datos anorexia y mostrar tipo de datos
anorexia <- as.data.frame(anorexia)
sapply(anorexia, class)
##     Treat     Prewt    Postwt 
##  "factor" "numeric" "numeric"
# Comprobar si existen NA o NULL
sum(is.na(anorexia$Treat)|is.null(anorexia$Treat))
## [1] 0
sum(is.na(anorexia$Prewt)|is.null(anorexia$Prewt))
## [1] 0
sum(is.na(anorexia$Postwt)|is.null(anorexia$Postwt))
## [1] 0
# Transformar valores "CBT", "Cont" y "FT" en "Cogn Beh Tr", "Contr" y "Fam Tr"
anorexia$Treat <- as.character(anorexia$Treat)
anorexia[anorexia == "CBT"] <- "Cogn Beh Tr"
anorexia[anorexia == "Cont"] <- "Contr"
anorexia[anorexia == "FT"] <- "Fam Tr"
anorexia$Treat <- as.factor(anorexia$Treat)

summary(anorexia)
##          Treat        Prewt           Postwt      
##  Cogn Beh Tr:29   Min.   :70.00   Min.   : 71.30  
##  Contr      :26   1st Qu.:79.60   1st Qu.: 79.33  
##  Fam Tr     :17   Median :82.30   Median : 84.05  
##                   Mean   :82.41   Mean   : 85.17  
##                   3rd Qu.:86.00   3rd Qu.: 91.55  
##                   Max.   :94.90   Max.   :103.60

Ejercicio 4

# Exportar datos de "biopsy"
write.csv(biopsy, file = "biopsy.csv")

# Exportar datos de "melanoma" a 3 formatos distintos
library(openxlsx)

write.csv(Melanoma, file = "melanoma.csv")
write.table(Melanoma, file = "melanoma.txt")
write.xlsx(Melanoma, file = "melanoma.xlsx")

# Crear resumen de la variable "edad" y exportar a documento Word
melanoma_age <- as.array(summary(Melanoma$age))
melanoma_age
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.00   42.00   54.00   52.46   65.00   95.00
write.table(melanoma_age, file = "melanoma.doc")

# Importar conjunto de datos desde fichero .csv
beans <- read.csv("P:/AntonioAdri/WinDocuments/Adri/DryBeanDataset/Dry_Bean_Dataset.csv")

Ejercicio 5

# Máxima y mínima edad de las madres en birthwt
library(MASS)
birth_data <- as.data.frame(birthwt)
max(birth_data$age)
## [1] 45
min(birth_data$age)
## [1] 14
# Rango de edad de las madres
max(birth_data$age) - min(birth_data$age)
## [1] 31
# Comprobar si la madre cuyo recién nacido tenía el menor peso fumaba
birth_data$smoke[birth_data$bwt == min(birth_data$bwt)]
## [1] 1
# Pesos de los recién nacidos cuyas madres visitaron al menos dos veces al médico
subset(birth_data$bwt, birth_data$ftv == 3)
## [1] 2551 2782 2835 2126 2414 2450 2495

Ejercicio 6

# Crear matriz con los datos Prewt y Postwt
matrix_anorexia <- matrix(c(anorexia$Prewt,anorexia$Postwt),ncol=2) 
head(matrix_anorexia)
##      [,1] [,2]
## [1,] 80.7 80.2
## [2,] 89.4 80.1
## [3,] 91.8 86.4
## [4,] 74.0 86.3
## [5,] 78.1 76.1
## [6,] 88.3 78.1

Ejercicio 7

# Generar conjunto de datos
Identificador <- c("I1","I2","I3","I4","I5","I6","I7","I8","I9","I10","I11","I12","I13","I14", "I15","I16","I17","I18","I19","I20","I21","I22","I23","I24","I25")   
Edad <- c(23,24,21,22,23,25,26,24,21,22,23,25,26,24,22,21,25,26,24,21,25,27,26,22,29)   
Sexo <- c(1,2,1,1,1,2,2,2,1,2,1,2,2,2,1,1,1,2,2,2,1,2,1,1,2) #1 para mujeres y 2 para hombres   

Peso <- c(76.5,81.2,79.3,59.5,67.3,78.6,67.9,100.2,97.8,56.4,65.4,67.5,87.4,99.7,87.6 ,93.4,65.4,73.7,85.1,61.2,54.8,103.4,65.8,71.7,85.0)  

Alt <- c(165,154,178,165,164,175,182,165,178,165,158,183,184,164,189,167,182,179,165 ,158,183,184,189,166,175) #altura en cm    

Fuma <- c("SÍ","NO","SÍ","SÍ","NO","NO","NO","SÍ","SÍ","SÍ","NO","NO","SÍ","SÍ","SÍ", "SÍ","NO","NO","SÍ","SÍ","SÍ","NO","SÍ","NO","SÍ")    
Trat_Pulmón <- data.frame(Identificador,Edad,Sexo,Peso,Alt,Fuma)    
head(Trat_Pulmón)   
# Registros con edad < 22
subset1 <- subset(Trat_Pulmón, Edad > 22)
head(subset1)
# Registro 3 de la columna 4
Trat_Pulmón[3,4]
## [1] 79.3
#Registro con edad < 27 y sin incluir Alt
subset3 <- subset(Trat_Pulmón, Edad < 27, select = -Alt)
head(subset3)

Ejercicio 8

# Incorporar conjunto de datos chickweight
chick_wt <- as.data.frame(ChickWeight)

# Crear gráfico de dispersión de la variable weight
plot(chick_wt$weight)

# Crear diagrama de caja con la variable Time
boxplot(chick_wt$Time, xlab = "Time")

Ejercicio 9

# Crear nuevo dataframe a partir de anorexia
delta_prewt_postwt <- as.vector(anorexia$Postwt - anorexia$Prewt)

anorexia_treat_df <- data.frame(Treat = MASS::anorexia$Treat, Difference_Prewt_Postwt = delta_prewt_postwt)
head(anorexia_treat_df)
# Seleccionar aquellos con Treatment "Cont" y que hayan ganado peso
anorexia_treat_C_df <- subset(anorexia_treat_df, Difference_Prewt_Postwt > 0 & Treat == "Cont")
head(anorexia_treat_C_df)

Ejercicio 10

https://rpubs.com/Adrianagp20/1159431

Caso práctico

# Crear conjunto de datos con las variables de la tabla
set.seed(30)
Identificador <- as.character(seq(1:30))
Edad <- sample(18:99, 30, replace = T)
Genero <- sample(1:2, 30, replace = T)
Tratamiento <- as.factor(sample(c("A", "B", "C"), 30, replace = T))
Peso <- sample(40:90, 30, replace = T)
Estatura <- sample(150:190, 30, replace = T)

datos_practica <- data.frame(Id = Identificador, Edad = Edad, Gene = Genero, Trat = Tratamiento, Peso = Peso, Alt = Estatura)

summary(datos_practica)
##       Id                 Edad            Gene     Trat        Peso      
##  Length:30          Min.   :18.00   Min.   :1.0   A:10   Min.   :40.00  
##  Class :character   1st Qu.:47.00   1st Qu.:1.0   B: 9   1st Qu.:54.25  
##  Mode  :character   Median :66.00   Median :1.5   C:11   Median :63.50  
##                     Mean   :63.23   Mean   :1.5          Mean   :65.90  
##                     3rd Qu.:81.75   3rd Qu.:2.0          3rd Qu.:77.75  
##                     Max.   :99.00   Max.   :2.0          Max.   :90.00  
##       Alt       
##  Min.   :150.0  
##  1st Qu.:156.0  
##  Median :164.0  
##  Mean   :165.5  
##  3rd Qu.:172.0  
##  Max.   :190.0
# Crear variable IMC e incluir en nuestro conjunto de datos
IMC <-  datos_practica$Peso/datos_practica$Alt
datos_practica$IMC <- round(IMC,2)
head(datos_practica)
# Crear 2 subconjuntos diferenciados por género
Df_Hombres <- subset(datos_practica, Gene == 2)
head(Df_Hombres)
Df_Mujeres <- subset(datos_practica, Gene == 1)
head(Df_Mujeres)
# Combinar subconjuntos
datos_practica_2 <- rbind(Df_Hombres, Df_Mujeres)
datos_practica_2$Id <- as.integer(datos_practica_2$Id)
head(datos_practica_2[order(datos_practica_2$Id),])