options(repos = c(CRAN = "https://cloud.r-project.org"))
# Instalar y cargar las librerías necesarias
install.packages("ggplot2")
## Installing package into 'C:/Users/USUARIO/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\USUARIO\AppData\Local\Temp\Rtmp2LQyzA\downloaded_packages
install.packages("reshape2")
## Installing package into 'C:/Users/USUARIO/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'reshape2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\USUARIO\AppData\Local\Temp\Rtmp2LQyzA\downloaded_packages
install.packages("GGally")
## Installing package into 'C:/Users/USUARIO/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'GGally' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\USUARIO\AppData\Local\Temp\Rtmp2LQyzA\downloaded_packages
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
library(GGally)
## Warning: package 'GGally' was built under R version 4.4.3
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
# Crear un dataframe de ejemplo, descarge el archivo de los datos y copie la ruta
install.packages("readxl")
## Installing package into 'C:/Users/USUARIO/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\USUARIO\AppData\Local\Temp\Rtmp2LQyzA\downloaded_packages
library("readxl")
## Warning: package 'readxl' was built under R version 4.4.3
# Leer el archivo CSV desde la URL
data <- read_excel("C://Users//USUARIO//Downloads//fertility.xlsx", col_names = TRUE)
# Mostrar las primeras filas del dataframe
head(data)
## # A tibble: 6 × 10
## Estacion Age enfermedades_infantiles `Accident or serious trauma`
## <chr> <dbl> <chr> <chr>
## 1 spring 30 no yes
## 2 spring 35 yes no
## 3 spring 27 yes no
## 4 spring 32 no yes
## 5 spring 30 yes yes
## 6 spring 30 yes no
## # ℹ 6 more variables: `Surgical intervention` <chr>, fiebre_alta_año <chr>,
## # `Frequency of alcohol consumption` <chr>, `Smoking habit` <chr>,
## # n_horas_sentado_dia <dbl>, Diagnosis <chr>
# Filtrar solo las variables numéricas
datos_numericos <- data[, sapply(data, is.double)]
datos_numericos
## # A tibble: 100 × 2
## Age n_horas_sentado_dia
## <dbl> <dbl>
## 1 30 16
## 2 35 6
## 3 27 9
## 4 32 7
## 5 30 9
## 6 30 9
## 7 30 8
## 8 36 7
## 9 30 5
## 10 29 5
## # ℹ 90 more rows
# Calcular la matriz de correlación
matriz_correlacion <- cor(datos_numericos)
# Convertir la matriz de correlación en un dataframe para ggplot2
cor_data <- melt(matriz_correlacion)
# Crear la gráfica de correlación con indicadores numéricos
ggplot(data = cor_data, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = round(value, 2)), color = "black", size = 4) +
scale_fill_gradient2(low = "red", high = "blue", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Correlation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1)) +
coord_fixed()

#Histograma
ggplot(data, aes(x = Age)) +
geom_histogram(binwidth = 2, fill = "steelblue", color = "white") +
labs(title = "Distribución de Edad", x = "Edad", y = "Frecuencia") +
theme_minimal()

#2. Diagrama de boxplot
install.packages("readxl")
## Warning: package 'readxl' is in use and will not be installed
ggplot(data, aes(x = Diagnosis, y = Age, fill = enfermedades_infantiles)) +
geom_boxplot() +
labs(title = "Diagnostico segun edad", x = "Diagnostico", y = "EDAD") +
theme_light()

library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Agrupar por estado de fertilidad y calcular promedio de edad
bar_data <- data %>%
group_by(Diagnosis) %>%
summarise(PromedioEdad = mean(Age, na.rm = TRUE))
# Gráfica de barras
ggplot(bar_data, aes(x = Diagnosis, y = PromedioEdad)) +
geom_bar(stat = "identity") +
labs(title = "Promedio de Edad por Estado de Fertilidad",
x = "Estado de Fertilidad",
y = "Edad Promedio") +
theme_minimal()

# Instala patchwork si no lo tienes
install.packages("patchwork")
## Installing package into 'C:/Users/USUARIO/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'patchwork' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\USUARIO\AppData\Local\Temp\Rtmp2LQyzA\downloaded_packages
install.packages("gtable")
## Installing package into 'C:/Users/USUARIO/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'gtable' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\USUARIO\AppData\Local\Temp\Rtmp2LQyzA\downloaded_packages
# Cargar librerías
library(ggplot2)
library(dplyr)
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.4.3
# Histograma de edad
g1 <- ggplot(data, aes(x = Age)) +
geom_histogram(binwidth = 2, fill = "skyblue", color = "black") +
labs(title = "Distribución de Edad", x = "Edad", y = "Frecuencia") +
theme_minimal()
# Gráfico de dispersión
g2 <- ggplot(data, aes(x = Age, y = n_horas_sentado_dia)) +
geom_point(color = "darkred", alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE, color = "black") +
labs(title = "Edad vs Numero de horas sentado", x = "Edad", y = "Horas sentado") +
theme_classic()
# Gráfico de barras (promedio de edad por grupo)
bar_data <- data %>%
group_by(Diagnosis) %>%
summarise(PromedioEdad = mean(Age, na.rm = TRUE))
g3 <- ggplot(bar_data, aes(x = Diagnosis, y = PromedioEdad, fill = Diagnosis)) +
geom_bar(stat = "identity") +
labs(title = "Edad Promedio por Estado", x = "Estado", y = "Edad Promedio") +
theme_light()
# Imagen compuesta
g1 + g2 + g3 + plot_layout(ncol = 1)
## `geom_smooth()` using formula = 'y ~ x'
