CAPACITACIÓN

#install.packages("ggplot2")
#devtools::install_github("centromagis/paqueteMETODOS") #descarga paquete paqueteMETODOS
library(ggplot2)
#install.packages("waffle")
library(waffle)
#install.packages("devtools")     # solo una vez
#install.packages("plotly")
library(ggplot2)
library(plotly)

## 
## Adjuntando el paquete: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(paqueteMETODOS)

## Cargando paquete requerido: cubature

## Cargando paquete requerido: dplyr

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

## Cargando paquete requerido: flextable

## 
## Adjuntando el paquete: 'flextable'

## The following objects are masked from 'package:plotly':
## 
##     highlight, style

## Cargando paquete requerido: lmtest

## Cargando paquete requerido: zoo

## 
## Adjuntando el paquete: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Cargando paquete requerido: MASS

## 
## Adjuntando el paquete: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## The following object is masked from 'package:plotly':
## 
##     select

## Cargando paquete requerido: psych

## 
## Adjuntando el paquete: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## Cargando paquete requerido: summarytools

## Cargando paquete requerido: randtests

## Cargando paquete requerido: rapportools

## 
## Adjuntando el paquete: 'rapportools'

## The following objects are masked from 'package:summarytools':
## 
##     label, label<-

## The following object is masked from 'package:dplyr':
## 
##     n

## The following objects are masked from 'package:stats':
## 
##     IQR, median, sd, var

## The following objects are masked from 'package:base':
## 
##     max, mean, min, range, sum

Creación de datos.

set.seed(123) 
Grupo1=sample(1:4, size = 4, replace = FALSE)
Grupo2= sample(1:4, size = 4, replace = FALSE)
Grupo3= sample(1:4, size = 4, replace = FALSE)

tabla= data.frame( Grupo1 = Grupo1, Grupo2 = Grupo2, Grupo3 = Grupo3)
tabla

##   Grupo1 Grupo2 Grupo3
## 1      3      3      3
## 2      4      2      1
## 3      1      4      2
## 4      2      1      4

Grafico

barplot(as.matrix(tabla),
        beside = TRUE,  # Para hacer las barras sean apiladas
        main = "Gráfico de barras apiladas",
        xlab = "Grpos del eje x",
        ylab = "Cantidad",
        axes = TRUE, #ESCALA
        col = c("lightblue", "blue", "#FED976", "red"))

Creo base de datos con dos variables grupo y valor

df2 = data.frame(
  grupo = c("Grupo A", "Grupo B", "Grupo C", "Grupo D", "Grupo E"),
  valor = c(1.5, -1.2, 0.8, -0.5, 1.0)
)
df2

##     grupo valor
## 1 Grupo A   1.5
## 2 Grupo B  -1.2
## 3 Grupo C   0.8
## 4 Grupo D  -0.5
## 5 Grupo E   1.0

#Si el color es menor que 0 se pone rosa y si es mayor que 0 es azul.
color = ifelse(df2$valor < 0, "red", "green")

Creo la grafica

ggplot(df2, aes(x = reorder(grupo, valor), y = valor)) +
  geom_bar(stat = "identity",
           fill = color,      # Color de fondo
           color = "black") + # Color del borde
  labs(title = "ejes", x = "Grupo", y = "Valor") +
  coord_flip() + ## Intercambiar condenadas
  theme_minimal() ## mejor estetica al fondo

otro tipo de grafico

df3 <- data.frame(
  grupo = c("Grupo A", "Grupo B", "Grupo C", "Grupo D", "Grupo E"),
  valor = c(1.5, -1.2, 0.8, -0.5, 1.0)
)

Grafico

ggplot(df3, aes(x = reorder(grupo, valor), y = valor)) +
  geom_bar(stat = "identity",
           show.legend = FALSE,
           aes(fill = valor),  # Color de fondo
           color = "black") + # Color del borde
  xlab("Grupo") +
  ylab("Valor") +
  scale_fill_gradient2(low = "#F4A460",
                       mid = "aliceblue",
                       high = "#6495ED")+
theme_minimal()

otro grafico

d = c(S1 = 30,  S2 = 25, S3 = 20, S4 = 5, S5= 12)
waffle(d, rows = 5,
       legend_pos = "bottom")

## S1 S2 S3 S4 S5 
## 30 25 20  5 12

Cargar datos

data("CarreraLuz22")
head(CarreraLuz22)

## # A tibble: 6 × 7
##      id sex     edad origen  timerun categoria      nacionalidad
##   <dbl> <chr>  <int> <chr>     <dbl> <chr>          <chr>       
## 1     1 Hombre    22 Sevilla    1988 2. Abierta     COL         
## 2     2 Hombre    31 Sevilla    2041 2. Abierta     COL         
## 3     3 Hombre    31 Yumbo      2083 2. Abierta     COL         
## 4     4 Hombre    36 Cali       2112 2. Abierta     COL         
## 5     5 Hombre    32 Cali       2213 2. Abierta     COL         
## 6     6 Hombre    48 Palmira    2238 3. Veteranos A COL

#agregar conteo

Grafico de barras con genero y categoria

#ATLETAS POR CATEGORIA Y GENERO
barras <- ggplot(CarreraLuz22, aes(x = categoria, fill = sex)) +
  geom_bar(position = "stack", width = 0.7, color = "black") +
  labs(title = "Número de atletas por categoría y sexo",
       x = "Categorías",
       y = "Número de atletas") +
  scale_fill_manual(values = c("#0d3b66", "#f4d35e")) + ##generar colores y no sean estandar
  theme_minimal()

print(barras)

ggplotly(barras)

Grafico de cajas para categoria y sexo.

## BOXPLOT POR CATEGORIA
boxplotcate = ggplot(CarreraLuz22, aes(x = sex, y = timerun/60, fill = categoria)) +
  geom_boxplot() +
  labs(title = "Distribución del tiempo de carrera La Luz 2022",
       x = "Sexo",
       y = "Tiempo (min)") +
  theme_minimal()
boxplotcate

Grafico de caja para ver la Distribución del tiempo por genero

#CAJA DE BOXPLOT
tiempo =ggplot(CarreraLuz22, aes(x = sex, y = timerun/60, fill = sex)) +
  geom_boxplot() +
  labs(title = "Distribución del tiempo de carrera La Luz 2022",
       x = "Sexo",
       y = "Tiempo (min)") +
  scale_fill_manual(values = c("blue", "pink")) +
  theme_minimal()
ggplotly(tiempo)

Relacion tiempo - genero

t= ggplot(CarreraLuz22, aes(y=edad , x=timerun/60))+
  geom_point()+
  facet_wrap(~ sex) #divide segun valores o categorias
ggplotly(t)

data("CarreraLuz22")
dataFc4 = subset(CarreraLuz22, (CarreraLuz22$sex == "Hombre" & CarreraLuz22$categoria == "4. Veteranos B"))
z = ggplot(dataFc4, aes(x = timerun/60)) +
  geom_histogram(bins = 8, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Distribución de los tiempos hombres Veteranos B",
       x = "Tiempo (min)",
       y = "Frecuencia") +
  theme_minimal()
ggplotly(z)

Variables nominales (Encuesta)

# BARRAS VARIABLES NOMILAES
conteo = c(5, 13, 30, 52, 35)
categorias = c("Muy regular", "Regular", "Bueno", "Muy bueno", "Excelente")
conteo_data = data.frame(Categoria = factor(categorias, levels = categorias), Frecuencia = conteo)


plot = ggplot(conteo_data, aes(x = Categoria, y = Frecuencia, fill = Categoria)) +
  geom_bar(stat = "identity", width = 0.8, color= "black") +
  scale_fill_manual(values = c("#f95738", "#ee964b", "#f4d35e", "#faf0ca", "#0d3b66")) +
  labs(title = "Evaluacion proceso de induccion",
       x = "Categoria",
       y = "Frecuencia") +
  theme_minimal() +
  theme(legend.position = "none") #no genere de nuevo una leyenda
ggplotly(plot)

Tabla de frecuencia de los datos

#TABLA DE FRECUENCIA
conteo = c( rep("1. Muy regular", 5), 
rep("2. Regular", 13),
rep("3. Bueno", 30),
rep("4. Muy bueno", 52), 
rep("5. Excelente", 35))
t1 = table(conteo)
t1

## conteo
## 1. Muy regular     2. Regular       3. Bueno   4. Muy bueno   5. Excelente 
##              5             13             30             52             35

summarytools::freq(conteo)

## Frequencies  
## conteo  
## Type: Character  
## 
##                        Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## -------------------- ------ --------- -------------- --------- --------------
##       1. Muy regular      5      3.70           3.70      3.70           3.70
##           2. Regular     13      9.63          13.33      9.63          13.33
##             3. Bueno     30     22.22          35.56     22.22          35.56
##         4. Muy bueno     52     38.52          74.07     38.52          74.07
##         5. Excelente     35     25.93         100.00     25.93         100.00
##                 <NA>      0                               0.00         100.00
##                Total    135    100.00         100.00    100.00         100.00

Tabla

##Ejercicio tomado de CENTROMAGIS
library(formattable)

## 
## Adjuntando el paquete: 'formattable'

## The following object is masked from 'package:rapportools':
## 
##     percent

## The following object is masked from 'package:MASS':
## 
##     area

## The following object is masked from 'package:flextable':
## 
##     style

## The following object is masked from 'package:plotly':
## 
##     style

df = data.frame(
  id = 1:10,
  nombre = c("Juan", "Carlos", "James", "David", "Jenny",
             "Harold", "Leonardo", "John", "Ana", "Luz"),
  edad = c(25, 27, 30, 28, 29, 29, 35, 27, 31, 30),
  Semestre = c("5", "8", "1", "6", "3", "7", "9", "10", "2", "3"),
  Corte1 = c(8.9, 9.5, 9.6, 8.9, 9.1, 9.3, 9.3, 9.9, 8.5, 8.6),
  Corte2 = c(9.1, 9.1, 9.2, 9.1, 8.9, 8.5, 9.2, 9.3, 9.1, 8.8),
  Corte3 = c(8.9, 9.5, 9.6, 8.9, 8.9, 8.5, 9.2, 9.3, 9.1, 8.8)
)

df$Promedio = rowMeans(df[, c("Corte1", "Corte2", "Corte3")])
formattable(df, list(
  edad = color_tile("white", "orange"),
  area(col = c(Corte1, Corte2, Corte3)) ~ color_tile("white", "lightblue", 0.4),
  Promedio = formatter("span",
                       style = x ~ style(color = ifelse(rank(-x) <= 3, "green", "gray")),
                       x ~ sprintf("%.2f (rank: %02d)", x, rank(-x)))
))

id	nombre	edad	Semestre	Corte1	Corte2	Corte3	Promedio
1	Juan	25	5	8.9	9.1	8.9	8.97 (rank: 06)
2	Carlos	27	8	9.5	9.1	9.5	9.37 (rank: 03)
3	James	30	1	9.6	9.2	9.6	9.47 (rank: 02)
4	David	28	6	8.9	9.1	8.9	8.97 (rank: 06)
5	Jenny	29	3	9.1	8.9	8.9	8.97 (rank: 06)
6	Harold	29	7	9.3	8.5	8.5	8.77 (rank: 09)
7	Leonardo	35	9	9.3	9.2	9.2	9.23 (rank: 04)
8	John	27	10	9.9	9.3	9.3	9.50 (rank: 01)
9	Ana	31	2	8.5	9.1	9.1	8.90 (rank: 08)
10	Luz	30	3	8.6	8.8	8.8	8.73 (rank: 10)

x <- function(n = 70, min_val = 10, max_val = 80, decimal_places = 2) 
  {
  datos <- runif(n, min = min_val, max = max_val) 
  datos <- round(datos, decimal_places)  
  return(datos)  }

valores <- x(n = 70)
data5 <- data.frame(
  name1 = rep("A", 70),
  value1 = valores
)
p2 <- ggplot(data5, aes(y = " ", x = value1)) +
  geom_boxplot(width = 0.5, color = "orange", fill = NA) +
  geom_jitter(color = "#034A94", size = 3, alpha = 0.5) +  
  theme_minimal() 

plot(p2)

cat("La media es = ", mean(valores), "\n")

## La media es =  43.83371

cat("La mediana es = ", median(valores), "\n")

## La mediana es =  42.01

cat("La media geométrica es = ", geometric.mean(valores), "\n")

## La media geométrica es =  38.75578

cat("El 1er, 2do y 3er cuartil son = ", quantile(valores, probs=c(0.25, 0.50, 0.75)), "\n")

## El 1er, 2do y 3er cuartil son =  26.495 42.01 59.6825

library(psych)
z = c(10, 20, 20, 30, 40, 50, 50, 50, 60, 76, 80)

cat("La media es = ", mean(z), "\n")

## La media es =  44.18182

cat("La mediana es = ", median(z), "\n")

## La mediana es =  50

cat("La media geométrica es = ", geometric.mean(z), "\n")

## La media geométrica es =  37.70356

cat("El 1er, 2do y 3er cuartil son = ", quantile(z, probs=c(0.25, 0.50, 0.75)), "\n")

## El 1er, 2do y 3er cuartil son =  25 50 55

plot(z)

CAPACITACIÓN

Genjis Ossa

2024-09-03

Creación de datos.

Grafico

Creo base de datos con dos variables grupo y valor

Creo la grafica

otro tipo de grafico

Grafico

otro grafico

Cargar datos

Grafico de barras con genero y categoria

Grafico de cajas para categoria y sexo.

Grafico de caja para ver la Distribución del tiempo por genero

Relacion tiempo - genero

Variables nominales (Encuesta)

Tabla de frecuencia de los datos

Tabla