Utilizando las funciones citadas en este Laboratorio, comprobad qué paquetes tenéis instalados en vuestra versión de RStudio e instalad el paquete MASS y el paquete Survival y comprobad la información que contienen. Buscad información sobre el paquete Rcmdr (R Commander) desde la consola.
Para saber que paquetes hay instalados se hace con:
sessionInfo()
## R version 4.5.2 (2025-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=Spanish_Spain.utf8 LC_CTYPE=Spanish_Spain.utf8
## [3] LC_MONETARY=Spanish_Spain.utf8 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Spain.utf8
##
## time zone: Europe/Madrid
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.39 R6_2.6.1 fastmap_1.2.0 xfun_0.56
## [5] cachem_1.1.0 knitr_1.51 htmltools_0.5.9 rmarkdown_2.30
## [9] lifecycle_1.0.5 cli_3.6.5 sass_0.4.10 jquerylib_0.1.4
## [13] compiler_4.5.2 tools_4.5.2 evaluate_1.0.5 bslib_0.10.0
## [17] yaml_2.3.12 rlang_1.1.7 jsonlite_2.0.0
Para la instalacion se puede hacer:
#install.packages("MASS")
#install.packages("survival")
Y para la información sobre Rcmdr:
??Rcmdr
## starting httpd help server ... done
Nos dice que que sirve para invocar comandos de las herramientas de CMD desde R.
Importad un archivo de texto y buscad un summary() de tres variables que escojáis.
iris_data <- read.csv("C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Iris.txt")
head(iris_data)
summary(iris_data[, c("SepalLengthCm", "PetalLengthCm", "Species")])
## SepalLengthCm PetalLengthCm Species
## Min. :4.300 Min. :1.000 Length:150
## 1st Qu.:5.100 1st Qu.:1.600 Class :character
## Median :5.800 Median :4.350 Mode :character
## Mean :5.843 Mean :3.759
## 3rd Qu.:6.400 3rd Qu.:5.100
## Max. :7.900 Max. :6.900
Importad un archivo «.csv» y buscad un fivenum() de dos variables que os parezcan relevantes para el estudio.
mtcars_data <- read.csv("C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\mtcars.csv")
head(mtcars_data)
fivenum(mtcars_data$mpg)
## [1] 10.40 15.35 19.20 22.80 33.90
fivenum(mtcars_data$hp)
## [1] 52 96 123 180 335
library(MASS)
data("anorexia")
head(anorexia)
Los tipos de datos de las variables son:
sapply(anorexia, class)
## Treat Prewt Postwt
## "factor" "numeric" "numeric"
Para saber si hay NAs o NULLs:
any(is.na(anorexia))
## [1] FALSE
any(is.null(anorexia))
## [1] FALSE
anorexia$Treat <- factor(anorexia$Treat,
levels = c("CBT", "Cont", "FT"),
labels = c("Cogn Beh Tr", "Contr", "Fam Tr"))
anorexia$Treat
## [1] Contr Contr Contr Contr Contr Contr
## [7] Contr Contr Contr Contr Contr Contr
## [13] Contr Contr Contr Contr Contr Contr
## [19] Contr Contr Contr Contr Contr Contr
## [25] Contr Contr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr
## [31] Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr
## [37] Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr
## [43] Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr
## [49] Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr Cogn Beh Tr
## [55] Cogn Beh Tr Fam Tr Fam Tr Fam Tr Fam Tr Fam Tr
## [61] Fam Tr Fam Tr Fam Tr Fam Tr Fam Tr Fam Tr
## [67] Fam Tr Fam Tr Fam Tr Fam Tr Fam Tr Fam Tr
## Levels: Cogn Beh Tr Contr Fam Tr
Para exportar:
data("biopsy")
# Exportar a csv
write.csv(biopsy, file = "C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\biopsy.csv")
data("Melanoma")
# A csv
write.csv(Melanoma, file = "C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Melanoma\\melanoma.csv")
# A txt
write.csv(Melanoma, file = "C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Melanoma\\melanoma.txt")
# A binario
save(Melanoma, file = "C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Melanoma\\melanoma.bin")
Que da como resultado:
library(officer)
# Get summary output as text
summary_age <- capture.output(summary(Melanoma$age))
# Crear el documento docx
doc <- read_docx()
doc <- body_add_par(doc, paste(summary_age, collapse = "\n"))
# Guardar el document
print(doc, target = "C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Melanoma\\age_summary.docx")
El dataset elegido es Mapping Police Violence de https://mappingpoliceviolence.org/
import_data <- read.csv("C:\\Users\\David\\Desktop\\UOC\\programari_per_analisi_dades\\LAB1\\Mapping Police Violence.csv")
head(import_data)
data("birthwt")
max(birthwt$age)
## [1] 45
min(birthwt$age)
## [1] 14
range(birthwt$age)
## [1] 14 45
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
query <- birthwt %>%
arrange(bwt)
query[1, ]$smoke == 1
## [1] TRUE
query2 <- birthwt %>%
arrange(desc(age))
query2[1, ]$bwt
## [1] 4990
birthwt %>%
filter(ftv < 2) %>%
select(bwt)
matrix(c(anorexia$Prewt, anorexia$Postwt), nrow = length(anorexia$Treat), ncol = 2)
## [,1] [,2]
## [1,] 80.7 80.2
## [2,] 89.4 80.1
## [3,] 91.8 86.4
## [4,] 74.0 86.3
## [5,] 78.1 76.1
## [6,] 88.3 78.1
## [7,] 87.3 75.1
## [8,] 75.1 86.7
## [9,] 80.6 73.5
## [10,] 78.4 84.6
## [11,] 77.6 77.4
## [12,] 88.7 79.5
## [13,] 81.3 89.6
## [14,] 78.1 81.4
## [15,] 70.5 81.8
## [16,] 77.3 77.3
## [17,] 85.2 84.2
## [18,] 86.0 75.4
## [19,] 84.1 79.5
## [20,] 79.7 73.0
## [21,] 85.5 88.3
## [22,] 84.4 84.7
## [23,] 79.6 81.4
## [24,] 77.5 81.2
## [25,] 72.3 88.2
## [26,] 89.0 78.8
## [27,] 80.5 82.2
## [28,] 84.9 85.6
## [29,] 81.5 81.4
## [30,] 82.6 81.9
## [31,] 79.9 76.4
## [32,] 88.7 103.6
## [33,] 94.9 98.4
## [34,] 76.3 93.4
## [35,] 81.0 73.4
## [36,] 80.5 82.1
## [37,] 85.0 96.7
## [38,] 89.2 95.3
## [39,] 81.3 82.4
## [40,] 76.5 72.5
## [41,] 70.0 90.9
## [42,] 80.4 71.3
## [43,] 83.3 85.4
## [44,] 83.0 81.6
## [45,] 87.7 89.1
## [46,] 84.2 83.9
## [47,] 86.4 82.7
## [48,] 76.5 75.7
## [49,] 80.2 82.6
## [50,] 87.8 100.4
## [51,] 83.3 85.2
## [52,] 79.7 83.6
## [53,] 84.5 84.6
## [54,] 80.8 96.2
## [55,] 87.4 86.7
## [56,] 83.8 95.2
## [57,] 83.3 94.3
## [58,] 86.0 91.5
## [59,] 82.5 91.9
## [60,] 86.7 100.3
## [61,] 79.6 76.7
## [62,] 76.9 76.8
## [63,] 94.2 101.6
## [64,] 73.4 94.9
## [65,] 80.5 75.2
## [66,] 81.6 77.8
## [67,] 82.1 95.5
## [68,] 77.6 90.7
## [69,] 83.5 92.5
## [70,] 89.9 93.8
## [71,] 86.0 91.7
## [72,] 87.3 98.0
Identificador <- c("I1","I2","I3","I4","I5","I6","I7","I8","I9","I10","I11","I12","I13","I14",
"I15","I16","I17","I18","I19","I20","I21","I22","I23","I24","I25")
Edad <- c(23,24,21,22,23,25,26,24,21,22,23,25,26,24,22,21,25,26,24,21,25,27,26,22,29)
Sexo <-c(1,2,1,1,1,2,2,2,1,2,1,2,2,2,1,1,1,2,2,2,1,2,1,1,2) #1 para mujeres y
# 2 para hombres
Peso <- c(76.5,81.2,79.3,59.5,67.3,78.6,67.9,100.2,97.8,56.4,65.4,67.5,87.4,99.7,87.6
,93.4,65.4,73.7,85.1,61.2,54.8,103.4,65.8,71.7,85.0)
Alt <- c(165,154,178,165,164,175,182,165,178,165,158,183,184,164,189,167,182,179,165
,158,183,184,189,166,175) #altura en cm
Fuma <- c("SÍ","NO","SÍ","SÍ","NO","NO","NO","SÍ","SÍ","SÍ","NO","NO","SÍ","SÍ","SÍ",
"SÍ","NO","NO","SÍ","SÍ","SÍ","NO","SÍ","NO","SÍ")
Trat_Pulmon <- data.frame(Identificador,Edad,Sexo,Peso,Alt,Fuma)
Trat_Pulmon
Trat_Pulmon %>% filter(Edad > 22)
Trat_Pulmon[3, 4]
## [1] 79.3
subset(Trat_Pulmon, Edad < 27, select = -Alt)
data("ChickWeight")
plot(ChickWeight$weight,
ylab = "Weight (gm)",
main = "Gráfico de dispersión de la variable weight.")
grid()
boxplot(ChickWeight$Time,
ylab = "Time (days since birth)",
main = "Diagrama de caja de la variable time.")
# Eliminar las modificaciones a la tabla de ejercicios anteriores
rm(anorexia)
anorexia_treat_df <- data.frame(anorexia$Treat, Wdif = anorexia$Prewt - anorexia$Postwt)
head(anorexia_treat_df)
subset(subset(anorexia_treat_df, Wdif < 0), anorexia.Treat == "Cont")
Usuario creado: DataEnjoyer_42. El documento R Markdown que he decidio subir es este.
Nombres <- c("David", "Alex", "Daniel", "Fulgencio", "Sergi", "Juan", "Jaime",
"Jorge", "Sergio", "Bob", "Pedro", "Patricio", "Jinwoo", "Akaza", "Fred",
"Maria", "Paula", "Sofia", "Constancia", "Asuncion", "Asunta", "Dolores",
"Encarnacion", "Antonella", "Miercoles", "Sol", "Shinobu", "Jane",
"Berta", "Alba")
Ids <- c(sapply(ids::random_id(30, 2), toupper))
Edades <- sample(c(29, sample(18:100, 29, replace = TRUE)))
Generos <- c(rep(2, 15), rep(1, 15))
Tratamientos <- sample(factor(c("A", "B", "C")), 30, replace = TRUE)
Pesos <- c(rnorm(15, mean = 70, sd = 16),
rnorm(15, mean = 65.5, sd = 14.6))
Estaturas <- c(rnorm(15, mean = 177, sd = 7.55),
rnorm(15, mean = 162.5, sd = 7))
conjunto_datos <- data.frame(Nombres,
Ids,
Edades,
Generos,
Tratamientos,
Pesos,
Estaturas)
colnames(conjunto_datos) <- c("Nombre",
"Id",
"Edad",
"Genero",
"Tratamiento",
"Peso",
"Estatura")
head(conjunto_datos)
summary(conjunto_datos)
## Nombre Id Edad Genero
## Length:30 Length:30 Min. :20.00 Min. :1.0
## Class :character Class :character 1st Qu.:39.00 1st Qu.:1.0
## Mode :character Mode :character Median :61.00 Median :1.5
## Mean :58.83 Mean :1.5
## 3rd Qu.:80.25 3rd Qu.:2.0
## Max. :97.00 Max. :2.0
## Tratamiento Peso Estatura
## A:13 Min. :47.19 Min. :150.2
## B:13 1st Qu.:66.75 1st Qu.:162.6
## C: 4 Median :75.25 Median :169.2
## Mean :74.46 Mean :170.1
## 3rd Qu.:84.78 3rd Qu.:178.4
## Max. :99.22 Max. :188.7
conjunto_datos$IMC <- conjunto_datos$Peso / (conjunto_datos$Estatura/100)^2
head(conjunto_datos$IMC)
## [1] 25.98939 29.53445 23.68213 22.97434 17.95861 23.32790
Df_Hombres <- subset(conjunto_datos, Genero == 2)
Df_Hombres
Df_Mujeres <- subset(conjunto_datos, Genero == 1)
Df_Mujeres
conjunto_datos_fusionado <- rbind(Df_Hombres, Df_Mujeres)
conjunto_datos_fusionado