Ejercicio 1.-

Utilizando las funciones citadas en este Laboratorio, comprobad qué paquetes tenéis instalados en vuestra versión de RStudio e instalad el paquete MASS y el paquete Survival y comprobad la información que contienen. Buscad información sobre el paquete Rcmdr (R Commander) desde la consola.

Para saber que paquetes hay instalados se hace con:

sessionInfo()
## R version 4.5.2 (2025-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=Spanish_Spain.utf8  LC_CTYPE=Spanish_Spain.utf8   
## [3] LC_MONETARY=Spanish_Spain.utf8 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Spain.utf8    
## 
## time zone: Europe/Madrid
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.39   R6_2.6.1        fastmap_1.2.0   xfun_0.56      
##  [5] cachem_1.1.0    knitr_1.51      htmltools_0.5.9 rmarkdown_2.30 
##  [9] lifecycle_1.0.5 cli_3.6.5       sass_0.4.10     jquerylib_0.1.4
## [13] compiler_4.5.2  tools_4.5.2     evaluate_1.0.5  bslib_0.10.0   
## [17] yaml_2.3.12     rlang_1.1.7     jsonlite_2.0.0

Para la instalacion se puede hacer:

#install.packages("MASS")
#install.packages("survival")

Y para la información sobre Rcmdr:

??Rcmdr
## starting httpd help server ... done

Nos dice que que sirve para invocar comandos de las herramientas de CMD desde R.

Ejercicio 2.-

a)

Importad un archivo de texto y buscad un summary() de tres variables que escojáis.

iris_data <- read.csv("C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Iris.txt")
head(iris_data)
summary(iris_data[, c("SepalLengthCm", "PetalLengthCm", "Species")])
##  SepalLengthCm   PetalLengthCm     Species         
##  Min.   :4.300   Min.   :1.000   Length:150        
##  1st Qu.:5.100   1st Qu.:1.600   Class :character  
##  Median :5.800   Median :4.350   Mode  :character  
##  Mean   :5.843   Mean   :3.759                     
##  3rd Qu.:6.400   3rd Qu.:5.100                     
##  Max.   :7.900   Max.   :6.900

b)

Importad un archivo «.csv» y buscad un fivenum() de dos variables que os parezcan relevantes para el estudio.

mtcars_data <- read.csv("C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\mtcars.csv")
head(mtcars_data)
fivenum(mtcars_data$mpg)
## [1] 10.40 15.35 19.20 22.80 33.90
fivenum(mtcars_data$hp)
## [1]  52  96 123 180 335

Ejercicio 3.-

library(MASS)
data("anorexia")
head(anorexia)

Los tipos de datos de las variables son:

sapply(anorexia, class)
##     Treat     Prewt    Postwt 
##  "factor" "numeric" "numeric"

Para saber si hay NAs o NULLs:

any(is.na(anorexia))
## [1] FALSE
any(is.null(anorexia))
## [1] FALSE
anorexia$Treat <- factor(anorexia$Treat,
                         levels = c("CBT", "Cont", "FT"),
                         labels = c("Cogn Beh Tr", "Contr", "Fam Tr"))
anorexia$Treat
##  [1] Contr       Contr       Contr       Contr       Contr       Contr      
##  [7] Contr       Contr       Contr       Contr       Contr       Contr      
## [13] Contr       Contr       Contr       Contr       Contr       Contr      
## [19] Contr       Contr       Contr       Contr       Contr       Contr      
## [25] Contr       Contr       Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr
## [31] Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr
## [37] Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr
## [43] Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr
## [49] Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr
## [55] Cogn Beh Tr Fam Tr      Fam Tr      Fam Tr      Fam Tr      Fam Tr     
## [61] Fam Tr      Fam Tr      Fam Tr      Fam Tr      Fam Tr      Fam Tr     
## [67] Fam Tr      Fam Tr      Fam Tr      Fam Tr      Fam Tr      Fam Tr     
## Levels: Cogn Beh Tr Contr Fam Tr

Ejercicio 4.-

a)

Para exportar:

data("biopsy")
# Exportar a csv
write.csv(biopsy, file = "C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\biopsy.csv")

b)

data("Melanoma")
# A csv
write.csv(Melanoma, file = "C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Melanoma\\melanoma.csv")
# A txt
write.csv(Melanoma, file = "C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Melanoma\\melanoma.txt")
# A binario
save(Melanoma, file = "C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Melanoma\\melanoma.bin")

Que da como resultado:

c)

library(officer)

# Get summary output as text
summary_age <- capture.output(summary(Melanoma$age))

# Crear el documento docx
doc <- read_docx()
doc <- body_add_par(doc, paste(summary_age, collapse = "\n"))

# Guardar el document
print(doc, target = "C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Melanoma\\age_summary.docx")

d)

El dataset elegido es Mapping Police Violence de https://mappingpoliceviolence.org/

import_data <- read.csv("C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Mapping Police Violence.csv")
head(import_data)

Ejercicio 5.-

data("birthwt")

a)

max(birthwt$age)
## [1] 45

b)

min(birthwt$age)
## [1] 14

c)

range(birthwt$age)
## [1] 14 45

d)

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
query <- birthwt %>% 
  arrange(bwt)
query[1, ]$smoke == 1
## [1] TRUE

e)

query2 <- birthwt %>% 
  arrange(desc(age))
query2[1, ]$bwt
## [1] 4990

f)

birthwt %>% 
  filter(ftv < 2) %>%
  select(bwt)

Ejercicio 6.-

matrix(c(anorexia$Prewt, anorexia$Postwt), nrow = length(anorexia$Treat), ncol = 2)
##       [,1]  [,2]
##  [1,] 80.7  80.2
##  [2,] 89.4  80.1
##  [3,] 91.8  86.4
##  [4,] 74.0  86.3
##  [5,] 78.1  76.1
##  [6,] 88.3  78.1
##  [7,] 87.3  75.1
##  [8,] 75.1  86.7
##  [9,] 80.6  73.5
## [10,] 78.4  84.6
## [11,] 77.6  77.4
## [12,] 88.7  79.5
## [13,] 81.3  89.6
## [14,] 78.1  81.4
## [15,] 70.5  81.8
## [16,] 77.3  77.3
## [17,] 85.2  84.2
## [18,] 86.0  75.4
## [19,] 84.1  79.5
## [20,] 79.7  73.0
## [21,] 85.5  88.3
## [22,] 84.4  84.7
## [23,] 79.6  81.4
## [24,] 77.5  81.2
## [25,] 72.3  88.2
## [26,] 89.0  78.8
## [27,] 80.5  82.2
## [28,] 84.9  85.6
## [29,] 81.5  81.4
## [30,] 82.6  81.9
## [31,] 79.9  76.4
## [32,] 88.7 103.6
## [33,] 94.9  98.4
## [34,] 76.3  93.4
## [35,] 81.0  73.4
## [36,] 80.5  82.1
## [37,] 85.0  96.7
## [38,] 89.2  95.3
## [39,] 81.3  82.4
## [40,] 76.5  72.5
## [41,] 70.0  90.9
## [42,] 80.4  71.3
## [43,] 83.3  85.4
## [44,] 83.0  81.6
## [45,] 87.7  89.1
## [46,] 84.2  83.9
## [47,] 86.4  82.7
## [48,] 76.5  75.7
## [49,] 80.2  82.6
## [50,] 87.8 100.4
## [51,] 83.3  85.2
## [52,] 79.7  83.6
## [53,] 84.5  84.6
## [54,] 80.8  96.2
## [55,] 87.4  86.7
## [56,] 83.8  95.2
## [57,] 83.3  94.3
## [58,] 86.0  91.5
## [59,] 82.5  91.9
## [60,] 86.7 100.3
## [61,] 79.6  76.7
## [62,] 76.9  76.8
## [63,] 94.2 101.6
## [64,] 73.4  94.9
## [65,] 80.5  75.2
## [66,] 81.6  77.8
## [67,] 82.1  95.5
## [68,] 77.6  90.7
## [69,] 83.5  92.5
## [70,] 89.9  93.8
## [71,] 86.0  91.7
## [72,] 87.3  98.0

Ejercicio 7.-

Identificador <- c("I1","I2","I3","I4","I5","I6","I7","I8","I9","I10","I11","I12","I13","I14",
"I15","I16","I17","I18","I19","I20","I21","I22","I23","I24","I25")
Edad <- c(23,24,21,22,23,25,26,24,21,22,23,25,26,24,22,21,25,26,24,21,25,27,26,22,29)
Sexo <-c(1,2,1,1,1,2,2,2,1,2,1,2,2,2,1,1,1,2,2,2,1,2,1,1,2) #1 para mujeres y
# 2 para hombres
Peso <- c(76.5,81.2,79.3,59.5,67.3,78.6,67.9,100.2,97.8,56.4,65.4,67.5,87.4,99.7,87.6
,93.4,65.4,73.7,85.1,61.2,54.8,103.4,65.8,71.7,85.0)
Alt <- c(165,154,178,165,164,175,182,165,178,165,158,183,184,164,189,167,182,179,165
,158,183,184,189,166,175) #altura en cm
Fuma <- c("SÍ","NO","SÍ","SÍ","NO","NO","NO","SÍ","SÍ","SÍ","NO","NO","SÍ","SÍ","SÍ",
"SÍ","NO","NO","SÍ","SÍ","SÍ","NO","SÍ","NO","SÍ")
Trat_Pulmon <- data.frame(Identificador,Edad,Sexo,Peso,Alt,Fuma)
Trat_Pulmon

a)

Trat_Pulmon %>% filter(Edad > 22)

b)

Trat_Pulmon[3, 4]
## [1] 79.3

c)

subset(Trat_Pulmon, Edad < 27, select = -Alt)

Ejercicio 8.-

a)

data("ChickWeight")

b)

plot(ChickWeight$weight,
     ylab = "Weight (gm)",
     main = "Gráfico de dispersión de la variable weight.")
grid()

c)

boxplot(ChickWeight$Time,
        ylab = "Time (days since birth)",
        main = "Diagrama de caja de la variable time.")

Ejercicio 9.-

# Eliminar las modificaciones a la tabla de ejercicios anteriores
rm(anorexia)

anorexia_treat_df <- data.frame(anorexia$Treat, Wdif = anorexia$Prewt - anorexia$Postwt)
head(anorexia_treat_df)
subset(subset(anorexia_treat_df, Wdif < 0), anorexia.Treat == "Cont")

Ejercicio 10.-

Usuario creado: DataEnjoyer_42. El documento R Markdown que he decidio subir es este.

Caso prático.-

a)

Nombres <- c("David", "Alex", "Daniel", "Fulgencio", "Sergi", "Juan", "Jaime",
             "Jorge", "Sergio", "Bob", "Pedro", "Patricio", "Jinwoo", "Akaza", "Fred",
             "Maria", "Paula", "Sofia", "Constancia", "Asuncion", "Asunta", "Dolores",
             "Encarnacion", "Antonella", "Miercoles", "Sol", "Shinobu", "Jane",
             "Berta", "Alba")
Ids <- c(sapply(ids::random_id(30, 2), toupper))
Edades <- sample(c(29, sample(18:100, 29, replace = TRUE)))
Generos <- c(rep(2, 15), rep(1, 15))
Tratamientos <- sample(factor(c("A", "B", "C")), 30, replace = TRUE)
Pesos <- c(rnorm(15, mean = 70, sd = 16),
           rnorm(15, mean = 65.5, sd = 14.6))
Estaturas <- c(rnorm(15, mean = 177, sd = 7.55),
               rnorm(15, mean = 162.5, sd = 7))
conjunto_datos <- data.frame(Nombres, 
                             Ids, 
                             Edades, 
                             Generos, 
                             Tratamientos, 
                             Pesos, 
                             Estaturas)
colnames(conjunto_datos) <- c("Nombre", 
                              "Id", 
                              "Edad", 
                              "Genero", 
                              "Tratamiento", 
                              "Peso", 
                              "Estatura")
head(conjunto_datos)

b)

summary(conjunto_datos)
##     Nombre               Id                 Edad           Genero   
##  Length:30          Length:30          Min.   :20.00   Min.   :1.0  
##  Class :character   Class :character   1st Qu.:39.00   1st Qu.:1.0  
##  Mode  :character   Mode  :character   Median :61.00   Median :1.5  
##                                        Mean   :58.83   Mean   :1.5  
##                                        3rd Qu.:80.25   3rd Qu.:2.0  
##                                        Max.   :97.00   Max.   :2.0  
##  Tratamiento      Peso          Estatura    
##  A:13        Min.   :47.19   Min.   :150.2  
##  B:13        1st Qu.:66.75   1st Qu.:162.6  
##  C: 4        Median :75.25   Median :169.2  
##              Mean   :74.46   Mean   :170.1  
##              3rd Qu.:84.78   3rd Qu.:178.4  
##              Max.   :99.22   Max.   :188.7

c)

conjunto_datos$IMC <- conjunto_datos$Peso / (conjunto_datos$Estatura/100)^2
head(conjunto_datos$IMC)
## [1] 25.98939 29.53445 23.68213 22.97434 17.95861 23.32790

d)

Df_Hombres <- subset(conjunto_datos, Genero == 2)
Df_Hombres
Df_Mujeres <- subset(conjunto_datos, Genero == 1)
Df_Mujeres

e)

conjunto_datos_fusionado <- rbind(Df_Hombres, Df_Mujeres)
conjunto_datos_fusionado