#install.packages("UsingR")
#install.packages("MASS")
#install.packages("plotly")
#install.packages("ggplot2")
#install.packages("moments")
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Cargando paquete requerido: ggplot2
##
## Adjuntando el paquete: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(MASS)
##
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
library(ggplot2)
library(UsingR)
## Cargando paquete requerido: HistData
## Cargando paquete requerido: Hmisc
##
## Adjuntando el paquete: 'Hmisc'
## The following object is masked from 'package:plotly':
##
## subplot
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(moments)
brightness <- data.frame(brillo = brightness)
head(brightness)
## brillo
## 1 9.10
## 2 9.27
## 3 6.61
## 4 8.06
## 5 8.55
## 6 12.31
grafico_brightness <- ggplot(brightness, aes(x=brillo)) +
geom_histogram(aes(y = ..density..), binwidth = 2, fill = "lightgreen", alpha = 0.6, color = "black") +
geom_density(color = "#1f78b4", linewidth = 1, alpha = 0.2, fill = "red") +
geom_vline(xintercept = mean(brightness$brillo), color = "blue", linetype = "dashed") +
labs(title = "Histograma y Grafico de Densidad del Brillo de Estrellas",
x = "Brillo",
y = "Densidad") +
theme_minimal()
grafico_brightness
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Convertir el gráfico de ggplot2 a un gráfico interactivo con plotly
p_interactivo <- ggplotly(grafico_brightness)
# Mostrar el gráfico interactivo
p_interactivo
boxplot(brightness,
main = "Boxplot del Brillo de Estrellas",
ylab = "Brillo",
col = "lightgreen")
# Calcular el IQR
Q1 <- quantile(brightness$brillo)[2]
Q3 <- quantile(brightness$brillo)[4]
IQR <- Q3 - Q1
# Límites para identificar valores atípicos
lower_bound <- Q1 - 1.5*IQR
upper_bound <- Q3 + 1.5*IQR
# Identificar valores atípicos
atipicos <- brightness[brightness < lower_bound | brightness > upper_bound]
print(c("Los valores atipicos son",atipicos))
## [1] "Los valores atipicos son" "12.31"
## [3] "11.71" "5.53"
## [5] "11.28" "4.78"
## [7] "5.13" "4.37"
## [9] "5.04" "12.43"
## [11] "12.04" "4.55"
## [13] "11.55" "12.14"
## [15] "11.63" "4.99"
## [17] "11.67" "4.61"
## [19] "11.99" "12.04"
## [21] "5.55" "12.17"
## [23] "11.55" "11.79"
## [25] "12.19" "2.07"
## [27] "11.65" "11.73"
## [29] "2.28" "5.42"
## [31] "3.88" "5.54"
## [33] "5.29" "5.01"
## [35] "11.55" "4.89"
## [37] "11.8" "5.41"
## [39] "5.24"
atipico_dos <- sort(atipicos, decreasing = FALSE)
# segundo menor valor atipico
print(paste("El segundo menor valor atipico es:", atipico_dos[2]))
## [1] "El segundo menor valor atipico es: 2.28"
# Filtrando los datos que están dentro de los límites
datos_sin_atipicos <- brightness[brightness >= lower_bound & brightness <= upper_bound]
datos_sin_atipicos
## [1] 9.10 9.27 6.61 8.06 8.55 9.64 9.05 8.59 8.59 7.34 8.43 8.80
## [13] 7.25 8.60 8.15 11.03 6.53 8.51 7.55 8.69 7.57 9.05 6.28 9.13
## [25] 9.32 8.83 9.14 8.26 7.63 9.09 8.10 6.43 9.07 7.68 10.44 8.65
## [37] 7.46 8.70 10.61 8.20 6.18 7.91 9.59 8.57 10.78 7.31 9.53 6.49
## [49] 8.94 8.56 10.96 10.57 7.40 8.12 8.27 7.05 9.09 8.34 8.86 8.27
## [61] 6.36 8.08 11.00 8.55 7.83 8.79 8.33 10.42 8.26 8.97 6.90 9.93
## [73] 7.42 9.03 8.41 8.06 8.69 8.40 8.57 9.50 8.85 9.61 10.62 8.05
## [85] 7.80 5.71 7.87 7.64 7.66 8.68 8.12 10.10 8.67 10.46 9.87 9.48
## [97] 7.04 8.44 9.88 7.05 8.29 9.34 7.73 6.22 8.53 7.23 8.61 10.76
## [109] 8.93 7.95 7.46 8.60 8.55 9.20 6.82 8.29 6.83 7.21 5.58 8.70
## [121] 8.06 10.86 6.50 9.32 9.14 8.13 10.62 6.62 9.96 8.64 6.60 6.25
## [133] 7.83 10.03 9.04 8.47 7.33 8.66 10.35 8.96 8.49 11.26 8.15 7.04
## [145] 10.02 8.90 7.78 9.93 8.60 8.51 7.09 6.93 8.68 8.98 9.84 8.98
## [157] 7.98 10.16 8.86 8.58 9.56 9.24 9.63 5.80 9.05 8.45 8.86 7.84
## [169] 8.86 8.93 7.97 6.90 8.47 6.77 8.55 8.48 8.53 6.33 8.99 8.64
## [181] 9.55 8.74 8.16 9.46 5.70 7.62 8.95 8.97 8.94 7.24 10.32 8.24
## [193] 8.62 9.18 8.53 8.54 8.56 9.41 5.87 7.20 9.05 9.52 10.24 7.70
## [205] 8.17 7.29 9.26 7.94 8.42 8.56 7.52 7.74 8.85 9.01 7.17 9.04
## [217] 10.30 9.86 7.64 8.27 8.44 9.58 8.43 8.49 9.64 9.17 8.09 9.00
## [229] 6.25 8.56 10.81 8.76 7.76 7.82 7.90 8.52 9.73 9.19 8.10 8.75
## [241] 8.14 8.65 10.30 6.46 6.73 7.96 9.53 8.87 6.59 8.65 9.64 9.15
## [253] 9.04 8.42 8.09 9.06 8.09 8.18 8.77 7.36 9.16 8.82 11.14 6.24
## [265] 9.44 7.49 6.96 7.94 8.69 8.15 8.45 7.92 7.45 9.01 8.55 9.23
## [277] 9.16 7.90 8.68 7.78 8.21 8.11 8.29 7.89 9.67 8.24 6.80 8.18
## [289] 8.44 7.45 6.31 8.15 8.27 7.66 8.59 7.09 8.54 9.58 8.44 8.59
## [301] 8.01 8.29 9.62 7.26 7.91 9.45 8.19 8.93 7.65 8.53 7.38 8.56
## [313] 8.76 9.56 7.09 9.83 5.90 10.80 8.41 9.05 8.79 8.88 7.59 9.60
## [325] 10.66 8.55 8.11 9.44 9.60 5.78 10.66 6.38 8.80 7.79 8.60 7.77
## [337] 10.37 9.80 10.42 9.22 8.43 7.33 8.93 9.09 9.26 8.73 9.18 8.12
## [349] 9.26 8.94 6.11 9.13 7.90 9.34 7.13 10.82 7.46 8.72 7.02 9.08
## [361] 8.37 5.59 7.37 5.68 8.56 8.72 9.06 8.82 8.18 9.39 9.10 8.46
## [373] 9.15 8.28 8.18 7.93 9.21 6.09 8.31 7.83 8.72 6.61 6.25 7.82
## [385] 8.66 8.15 8.97 8.15 7.47 8.63 8.13 8.23 8.41 6.47 9.83 8.64
## [397] 7.73 8.64 8.94 8.84 6.32 5.80 8.97 7.53 7.41 7.80 8.14 6.71
## [409] 8.73 9.37 8.69 9.95 7.10 8.09 6.88 9.48 9.04 9.30 8.49 8.30
## [421] 7.95 7.08 6.93 8.38 8.56 8.78 7.42 8.26 7.71 6.91 9.16 8.99
## [433] 8.63 9.90 7.59 7.39 7.78 7.47 6.97 8.82 9.13 7.86 7.13 9.45
## [445] 8.78 7.23 9.73 7.36 7.36 8.47 9.37 6.99 8.20 8.36 8.22 9.91
## [457] 9.67 8.60 10.07 10.15 7.75 9.21 9.66 8.47 9.37 9.44 9.99 10.38
## [469] 7.51 8.91 7.45 9.57 8.99 8.58 6.90 7.55 7.93 9.71 9.57 8.55
## [481] 6.62 7.89 7.51 7.36 8.66 8.51 6.65 9.67 7.80 8.21 7.90 8.94
## [493] 9.82 8.69 8.57 8.89 5.98 7.92 7.60 8.22 5.70 8.75 6.93 7.97
## [505] 8.06 10.13 7.31 8.35 5.57 9.85 9.16 9.03 10.07 9.76 9.35 10.95
## [517] 8.87 6.68 9.69 8.05 10.30 6.07 8.51 7.71 8.56 8.26 8.62 10.92
## [529] 10.51 9.83 9.84 9.74 8.21 8.72 8.03 9.00 6.19 8.22 7.93 10.18
## [541] 8.98 9.13 6.91 8.79 8.23 10.24 8.83 7.62 8.96 10.41 8.97 9.61
## [553] 8.29 8.30 8.26 7.44 9.52 8.20 8.68 8.65 10.52 8.41 9.18 8.42
## [565] 8.86 7.92 10.97 8.85 9.31 10.28 7.56 7.88 7.99 8.23 8.52 9.14
## [577] 6.20 7.64 8.95 7.48 7.06 7.33 8.98 8.24 8.53 8.40 7.48 8.46
## [589] 9.29 8.57 8.70 8.50 8.37 6.87 7.50 7.39 8.19 7.56 8.37 7.39
## [601] 6.73 8.66 8.25 8.47 8.01 6.83 9.06 8.79 7.44 6.43 5.93 8.85
## [613] 9.86 8.55 7.66 7.82 9.08 10.10 8.21 8.85 7.79 7.58 7.85 7.18
## [625] 7.54 9.72 7.12 9.77 8.84 5.67 8.15 9.61 8.19 7.27 8.51 8.36
## [637] 10.00 8.74 6.18 10.26 10.16 8.31 8.58 7.04 8.81 5.99 8.22 9.86
## [649] 8.00 9.40 9.10 8.11 8.89 9.43 7.59 8.72 9.86 9.23 9.50 10.73
## [661] 7.59 7.41 9.26 7.78 7.76 8.94 8.95 6.41 6.11 7.76 7.38 6.21
## [673] 7.05 7.44 8.50 7.84 11.01 7.88 9.10 8.65 8.41 7.81 7.43 8.76
## [685] 7.58 9.55 6.82 10.24 6.24 7.31 10.52 9.27 7.13 9.14 8.48 8.57
## [697] 7.21 9.05 7.72 8.03 6.47 5.57 6.32 7.78 8.58 10.37 9.23 9.20
## [709] 6.93 9.32 7.11 9.79 8.21 8.42 7.05 9.26 8.77 9.25 9.30 10.63
## [721] 9.90 9.89 9.33 7.78 7.02 11.26 8.89 9.60 7.07 6.01 9.11 8.24
## [733] 8.97 8.59 7.17 7.94 7.27 9.59 7.94 8.52 7.59 9.17 8.08 9.80
## [745] 8.92 9.91 9.42 8.84 10.15 8.37 9.33 9.35 7.40 8.35 9.53 9.59
## [757] 10.05 8.57 8.48 8.43 8.45 8.84 11.18 8.64 8.42 6.34 7.93 8.36
## [769] 8.32 7.77 6.84 8.78 7.19 8.50 8.82 9.04 7.93 7.66 10.07 9.03
## [781] 8.13 7.51 9.08 7.10 7.88 9.40 9.06 8.38 10.65 7.77 8.50 8.61
## [793] 10.05 8.71 9.37 6.97 8.56 9.34 9.47 8.11 8.91 7.83 8.95 7.20
## [805] 9.37 5.84 9.81 9.27 9.50 9.32 8.92 8.38 7.74 8.60 9.49 8.35
## [817] 7.11 9.87 8.98 7.75 8.24 6.74 6.83 7.70 6.70 8.67 9.94 8.73
## [829] 9.63 6.66 8.29 8.47 8.16 8.97 7.51 8.97 8.55 5.84 7.85 8.68
## [841] 8.05 8.27 7.68 9.40 7.77 6.89 7.55 8.27 8.16 8.07 7.91 7.71
## [853] 10.16 8.41 8.88 9.64 7.93 7.78 8.90 8.55 9.15 10.86 9.08 7.44
## [865] 10.35 6.68 8.85 8.90 8.24 6.74 10.75 8.44 7.69 8.88 7.70 8.60
## [877] 8.44 9.50 9.03 7.15 7.95 8.23 9.81 8.48 9.33 8.97 8.08 7.47
## [889] 8.34 7.75 8.34 7.56 6.93 10.03 8.69 9.04 8.32 7.85 7.21 8.98
## [901] 7.09 8.85 9.21 8.61 7.91 7.47 8.65 8.53 9.92 8.09 7.06 8.45
## [913] 8.73 7.45 9.02 7.51 7.32 8.17 9.45 9.72 9.34 8.75 9.32 7.91
## [925] 7.49 6.53 6.18 8.69
library(ggplot2)
library(moments)
# Cálculo de medidas estadísticas
media <- mean(brightness$brillo)
mediana <- median(brightness$brillo)
moda <- names(sort(table(brightness$brillo), decreasing = TRUE))[1]
desv_std <- sd(brightness$brillo)
rango <- range(brightness$brillo)
iqr <- IQR(brightness$brillo)
asimetria <- skewness(brightness$brillo)
curtosis <- kurtosis(brightness$brillo)
# Imprimir resultados
cat("Media:", media, "\n")
## Media: 8.417743
cat("Mediana:", mediana, "\n")
## Mediana: 8.5
cat("Moda:", moda, "\n")
## Moda: 8.55
cat("Desviación estándar:", desv_std, "\n")
## Desviación estándar: 1.294231
cat("Rango:", rango, "\n")
## Rango: 2.07 12.43
cat("IQR:", iqr, "\n")
## IQR: 1.4275
cat("Asimetría:", asimetria, "\n")
## Asimetría: -0.2601868
cat("Curtosis:", curtosis, "\n")
## Curtosis: 4.550829
Media cercana a la mediana: El hecho de que la media (49.65) esté muy cerca de la mediana (49.99) sugiere que la distribución es aproximadamente simétrica. Es decir, los datos no están fuertemente sesgados hacia valores altos o bajos.
Moda: La moda (17.08) es considerablemente menor que la media y la mediana. Esto indica que hay una concentración de datos en valores más bajos, pero no es suficiente para decir que la distribución es bimodal.
Desviación estándar: Un valor de 9.95 indica una dispersión moderada alrededor de la media. Los datos están relativamente dispersos, pero no extremadamente.
Rango e IQR: El rango (17.09 - 78.52) y el IQR (13.95) nos dan una idea de la extensión de los datos. El rango indica el alcance total de los valores, mientras que el IQR nos dice el rango de los valores centrales (entre el primer y tercer cuartil).
Asimetría: Un valor de asimetría cercano a cero (-0.04) confirma la observación inicial de que la distribución es casi simétrica. Un valor negativo ligeramente menor a cero sugiere una muy leve tendencia a la asimetría negativa (cola ligeramente más larga hacia valores bajos), pero es prácticamente despreciable.
Curtosis: Un valor de curtosis de 2.92 indica que la distribución es ligeramente más apuntada (leptocúrtica) que una distribución normal estándar (que tiene una curtosis de 3). Esto significa que hay una concentración relativamente mayor de datos cerca de la media y en las colas, en comparación con una distribución normal.
UScereal <- data.frame(UScereal)
head(UScereal)
## mfr calories protein fat sodium fibre
## 100% Bran N 212.1212 12.121212 3.030303 393.9394 30.303030
## All-Bran K 212.1212 12.121212 3.030303 787.8788 27.272727
## All-Bran with Extra Fiber K 100.0000 8.000000 0.000000 280.0000 28.000000
## Apple Cinnamon Cheerios G 146.6667 2.666667 2.666667 240.0000 2.000000
## Apple Jacks K 110.0000 2.000000 0.000000 125.0000 1.000000
## Basic 4 G 173.3333 4.000000 2.666667 280.0000 2.666667
## carbo sugars shelf potassium vitamins
## 100% Bran 15.15152 18.18182 3 848.48485 enriched
## All-Bran 21.21212 15.15151 3 969.69697 enriched
## All-Bran with Extra Fiber 16.00000 0.00000 3 660.00000 enriched
## Apple Cinnamon Cheerios 14.00000 13.33333 1 93.33333 enriched
## Apple Jacks 11.00000 14.00000 2 30.00000 enriched
## Basic 4 24.00000 10.66667 3 133.33333 enriched
colnames(UScereal)
## [1] "mfr" "calories" "protein" "fat" "sodium" "fibre"
## [7] "carbo" "sugars" "shelf" "potassium" "vitamins"
UScereal1 <- dplyr::select(UScereal, mfr, shelf)
UScereal1$shelf <- factor(UScereal1$shelf)
valores_shelf <- levels(UScereal1$shelf)
valores_shelf
## [1] "1" "2" "3"
head(UScereal1)
## mfr shelf
## 100% Bran N 3
## All-Bran K 3
## All-Bran with Extra Fiber K 3
## Apple Cinnamon Cheerios G 1
## Apple Jacks K 2
## Basic 4 G 3
# Crear un gráfico de barras agrupadas
ggplot(UScereal1, aes(x = mfr, fill = factor(shelf))) +
geom_bar(position = "dodge") + # "dodge" para barras agrupadas
geom_text(stat = "count", aes(label = ..count..), position = position_dodge(width = 0.9), vjust = -0.5) +
labs(title = "Fabricantes por Estantes",
x = "Fabricante",
y = "Cantidad",
fill = "Estantes") +
theme_minimal()
UScereal2 <- dplyr::select(UScereal, fat, vitamins)
UScereal2
## fat vitamins
## 100% Bran 3.0303030 enriched
## All-Bran 3.0303030 enriched
## All-Bran with Extra Fiber 0.0000000 enriched
## Apple Cinnamon Cheerios 2.6666667 enriched
## Apple Jacks 0.0000000 enriched
## Basic 4 2.6666667 enriched
## Bran Chex 1.4925373 enriched
## Bran Flakes 0.0000000 enriched
## Cap'n'Crunch 2.6666667 enriched
## Cheerios 1.6000000 enriched
## Cinnamon Toast Crunch 4.0000000 enriched
## Clusters 4.0000000 enriched
## Cocoa Puffs 1.0000000 enriched
## Corn Chex 0.0000000 enriched
## Corn Flakes 0.0000000 enriched
## Corn Pops 0.0000000 enriched
## Count Chocula 1.0000000 enriched
## Cracklin' Oat Bran 6.0000000 enriched
## Crispix 0.0000000 enriched
## Crispy Wheat & Raisins 1.3333333 enriched
## Double Chex 0.0000000 enriched
## Froot Loops 1.0000000 enriched
## Frosted Flakes 0.0000000 enriched
## Frosted Mini-Wheats 0.0000000 enriched
## Fruit & Fibre: Dates Walnuts and Oats 2.9850746 enriched
## Fruitful Bran 0.0000000 enriched
## Fruity Pebbles 1.3333333 enriched
## Golden Crisp 0.0000000 enriched
## Golden Grahams 1.3333333 enriched
## Grape Nuts Flakes 1.1363636 enriched
## Grape-Nuts 0.0000000 enriched
## Great Grains Pecan 9.0909091 enriched
## Honey Graham Ohs 2.0000000 enriched
## Honey Nut Cheerios 1.3333333 enriched
## Honey-comb 0.0000000 enriched
## Just Right Fruit & Nut 1.3333333 100%
## Kix 0.6666667 enriched
## Life 2.9850746 enriched
## Lucky Charms 1.0000000 enriched
## Mueslix Crispy Blend 2.9850746 enriched
## Multi-Grain Cheerios 1.0000000 enriched
## Nut&Honey Crunch 1.4925373 enriched
## Nutri-Grain Almond-Raisin 2.9850746 enriched
## Oatmeal Raisin Crisp 4.0000000 enriched
## Post Nat. Raisin Bran 1.4925373 enriched
## Product 19 0.0000000 100%
## Puffed Rice 0.0000000 none
## Quaker Oat Squares 2.0000000 enriched
## Raisin Bran 1.3333333 enriched
## Raisin Nut Bran 4.0000000 enriched
## Raisin Squares 0.0000000 enriched
## Rice Chex 0.0000000 enriched
## Rice Krispies 0.0000000 enriched
## Shredded Wheat 'n'Bran 0.0000000 none
## Shredded Wheat spoon size 0.0000000 none
## Smacks 1.3333333 enriched
## Special K 0.0000000 enriched
## Total Corn Flakes 1.0000000 100%
## Total Raisin Bran 1.0000000 100%
## Total Whole Grain 1.0000000 100%
## Triples 1.3333333 enriched
## Trix 1.0000000 enriched
## Wheat Chex 1.4925373 enriched
## Wheaties 1.0000000 enriched
## Wheaties Honey Gold 1.3333333 enriched
str(UScereal2)
## 'data.frame': 65 obs. of 2 variables:
## $ fat : num 3.03 3.03 0 2.67 0 ...
## $ vitamins: Factor w/ 3 levels "100%","enriched",..: 2 2 2 2 2 2 2 2 2 2 ...
ggplot(UScereal2, aes(x = vitamins, y = fat)) +
geom_boxplot(fill = "#69b3a2") +
labs(title = "Distribucion de Grasa por Nivel de Vitaminas",
x = "Nivel de Vitaminas",
y = "Grasa (g)") +
theme_minimal()
ggplot(UScereal2, aes(x = vitamins, y = fat)) +
geom_violin(fill = "#69b3a2", color = "black") +
labs(title = "Distribucion de Grasa por Nivel de Vitaminas",
x = "Nivel de Vitaminas",
y = "Grasa (g)") +
theme_minimal()
ggplot(UScereal2, aes(x = vitamins, y = fat, fill = vitamins)) +
stat_summary(fun = "mean", geom = "bar") +
labs(title = "Media de Grasa por Nivel de Vitaminas",
x = "Nivel de Vitaminas",
y = "Media de Grasa (g)") +
theme_minimal() +
theme(legend.position = "none")
ggplot(UScereal2, aes(x = fat, fill = vitamins)) +
geom_histogram(binwidth = 1, color = "black", alpha = 0.7) +
facet_wrap(~ vitamins, scales = "free") +
labs(title = "Histograma de Grasa por Nivel de Vitaminas",
x = "Grasa (g)",
y = "Frecuencia") +
theme_minimal()
UScereal3 <- dplyr::select(UScereal, fat, shelf)
UScereal3
## fat shelf
## 100% Bran 3.0303030 3
## All-Bran 3.0303030 3
## All-Bran with Extra Fiber 0.0000000 3
## Apple Cinnamon Cheerios 2.6666667 1
## Apple Jacks 0.0000000 2
## Basic 4 2.6666667 3
## Bran Chex 1.4925373 1
## Bran Flakes 0.0000000 3
## Cap'n'Crunch 2.6666667 2
## Cheerios 1.6000000 1
## Cinnamon Toast Crunch 4.0000000 2
## Clusters 4.0000000 3
## Cocoa Puffs 1.0000000 2
## Corn Chex 0.0000000 1
## Corn Flakes 0.0000000 1
## Corn Pops 0.0000000 2
## Count Chocula 1.0000000 2
## Cracklin' Oat Bran 6.0000000 3
## Crispix 0.0000000 3
## Crispy Wheat & Raisins 1.3333333 3
## Double Chex 0.0000000 3
## Froot Loops 1.0000000 2
## Frosted Flakes 0.0000000 1
## Frosted Mini-Wheats 0.0000000 2
## Fruit & Fibre: Dates Walnuts and Oats 2.9850746 3
## Fruitful Bran 0.0000000 3
## Fruity Pebbles 1.3333333 2
## Golden Crisp 0.0000000 1
## Golden Grahams 1.3333333 2
## Grape Nuts Flakes 1.1363636 3
## Grape-Nuts 0.0000000 3
## Great Grains Pecan 9.0909091 3
## Honey Graham Ohs 2.0000000 2
## Honey Nut Cheerios 1.3333333 1
## Honey-comb 0.0000000 1
## Just Right Fruit & Nut 1.3333333 3
## Kix 0.6666667 2
## Life 2.9850746 2
## Lucky Charms 1.0000000 2
## Mueslix Crispy Blend 2.9850746 3
## Multi-Grain Cheerios 1.0000000 1
## Nut&Honey Crunch 1.4925373 2
## Nutri-Grain Almond-Raisin 2.9850746 3
## Oatmeal Raisin Crisp 4.0000000 3
## Post Nat. Raisin Bran 1.4925373 3
## Product 19 0.0000000 3
## Puffed Rice 0.0000000 3
## Quaker Oat Squares 2.0000000 3
## Raisin Bran 1.3333333 2
## Raisin Nut Bran 4.0000000 3
## Raisin Squares 0.0000000 3
## Rice Chex 0.0000000 1
## Rice Krispies 0.0000000 1
## Shredded Wheat 'n'Bran 0.0000000 1
## Shredded Wheat spoon size 0.0000000 1
## Smacks 1.3333333 2
## Special K 0.0000000 1
## Total Corn Flakes 1.0000000 3
## Total Raisin Bran 1.0000000 3
## Total Whole Grain 1.0000000 3
## Triples 1.3333333 3
## Trix 1.0000000 2
## Wheat Chex 1.4925373 1
## Wheaties 1.0000000 1
## Wheaties Honey Gold 1.3333333 1
str(UScereal3)
## 'data.frame': 65 obs. of 2 variables:
## $ fat : num 3.03 3.03 0 2.67 0 ...
## $ shelf: int 3 3 3 1 2 3 1 3 2 1 ...
ggplot(UScereal3, aes(x = factor(shelf), y = fat)) +
geom_boxplot(fill = "#69b3a2") +
labs(title = "Distribucion de Grasa por Estante",
x = "Estante",
y = "Grasa (g)") +
theme_minimal()
ggplot(UScereal3, aes(x = factor(shelf), y = fat, fill = factor(shelf))) +
stat_summary(fun = "mean", geom = "bar") +
labs(title = "Media de Grasa por Estante",
x = "Estante",
y = "Media de Grasa (g)") +
theme_minimal() +
theme(legend.position = "none")
ggplot(UScereal3, aes(x = shelf, y = fat)) +
geom_point(color = "#69b3a2") +
labs(title = "Grasa por Estante",
x = "Estante",
y = "Grasa (g)") +
theme_minimal()
ggplot(UScereal3, aes(x = fat, fill = factor(shelf))) +
geom_histogram(binwidth = 1, color = "black", alpha = 0.7) +
facet_wrap(~ shelf, scales = "free") +
labs(title = "Histograma de Grasa por Estante",
x = "Grasa (g)",
y = "Frecuencia") +
theme_minimal()
head(UScereal)
## mfr calories protein fat sodium fibre
## 100% Bran N 212.1212 12.121212 3.030303 393.9394 30.303030
## All-Bran K 212.1212 12.121212 3.030303 787.8788 27.272727
## All-Bran with Extra Fiber K 100.0000 8.000000 0.000000 280.0000 28.000000
## Apple Cinnamon Cheerios G 146.6667 2.666667 2.666667 240.0000 2.000000
## Apple Jacks K 110.0000 2.000000 0.000000 125.0000 1.000000
## Basic 4 G 173.3333 4.000000 2.666667 280.0000 2.666667
## carbo sugars shelf potassium vitamins
## 100% Bran 15.15152 18.18182 3 848.48485 enriched
## All-Bran 21.21212 15.15151 3 969.69697 enriched
## All-Bran with Extra Fiber 16.00000 0.00000 3 660.00000 enriched
## Apple Cinnamon Cheerios 14.00000 13.33333 1 93.33333 enriched
## Apple Jacks 11.00000 14.00000 2 30.00000 enriched
## Basic 4 24.00000 10.66667 3 133.33333 enriched
UScereal4 <- dplyr::select(UScereal, carbo, sugars)
UScereal4
## carbo sugars
## 100% Bran 15.15152 18.181818
## All-Bran 21.21212 15.151515
## All-Bran with Extra Fiber 16.00000 0.000000
## Apple Cinnamon Cheerios 14.00000 13.333333
## Apple Jacks 11.00000 14.000000
## Basic 4 24.00000 10.666667
## Bran Chex 22.38806 8.955224
## Bran Flakes 19.40299 7.462687
## Cap'n'Crunch 16.00000 16.000000
## Cheerios 13.60000 0.800000
## Cinnamon Toast Crunch 17.33333 12.000000
## Clusters 26.00000 14.000000
## Cocoa Puffs 12.00000 13.000000
## Corn Chex 22.00000 3.000000
## Corn Flakes 21.00000 2.000000
## Corn Pops 13.00000 12.000000
## Count Chocula 12.00000 13.000000
## Cracklin' Oat Bran 20.00000 14.000000
## Crispix 21.00000 3.000000
## Crispy Wheat & Raisins 14.66667 13.333333
## Double Chex 24.00000 6.666667
## Froot Loops 11.00000 13.000000
## Frosted Flakes 18.66667 14.666667
## Frosted Mini-Wheats 17.50000 8.750000
## Fruit & Fibre: Dates Walnuts and Oats 17.91045 14.925373
## Fruitful Bran 20.89552 17.910448
## Fruity Pebbles 17.33333 16.000000
## Golden Crisp 12.50000 17.045455
## Golden Grahams 20.00000 12.000000
## Grape Nuts Flakes 17.04545 5.681818
## Grape-Nuts 68.00000 12.000000
## Great Grains Pecan 39.39394 12.121212
## Honey Graham Ohs 12.00000 11.000000
## Honey Nut Cheerios 15.33333 13.333333
## Honey-comb 10.52632 8.270677
## Just Right Fruit & Nut 26.66667 12.000000
## Kix 14.00000 2.000000
## Life 17.91045 8.955224
## Lucky Charms 12.00000 12.000000
## Mueslix Crispy Blend 25.37313 19.402985
## Multi-Grain Cheerios 15.00000 6.000000
## Nut&Honey Crunch 22.38806 13.432836
## Nutri-Grain Almond-Raisin 31.34328 10.447761
## Oatmeal Raisin Crisp 27.00000 20.000000
## Post Nat. Raisin Bran 16.41791 20.895522
## Product 19 20.00000 3.000000
## Puffed Rice 13.00000 0.000000
## Quaker Oat Squares 28.00000 12.000000
## Raisin Bran 18.66667 16.000000
## Raisin Nut Bran 21.00000 16.000000
## Raisin Squares 30.00000 12.000000
## Rice Chex 20.35398 1.769912
## Rice Krispies 22.00000 3.000000
## Shredded Wheat 'n'Bran 28.35821 0.000000
## Shredded Wheat spoon size 29.85075 0.000000
## Smacks 12.00000 20.000000
## Special K 16.00000 3.000000
## Total Corn Flakes 21.00000 3.000000
## Total Raisin Bran 15.00000 14.000000
## Total Whole Grain 16.00000 3.000000
## Triples 28.00000 4.000000
## Trix 13.00000 12.000000
## Wheat Chex 25.37313 4.477612
## Wheaties 17.00000 3.000000
## Wheaties Honey Gold 21.33333 10.666667
str(UScereal4)
## 'data.frame': 65 obs. of 2 variables:
## $ carbo : num 15.2 21.2 16 14 11 ...
## $ sugars: num 18.2 15.2 0 13.3 14 ...
ggplot(UScereal4, aes(x = carbo, y = sugars)) +
geom_point(color = "#69b3a2") +
labs(title = "Relacion entre Carbohidratos y Azucares",
x = "Carbohidratos (g)",
y = "Azucares (g)") +
theme_minimal()
ggplot(UScereal4, aes(x = carbo, y = sugars)) +
geom_density2d() +
labs(title = "Densidad Conjunta de Carbohidratos y Azucares",
x = "Carbohidratos (g)",
y = "Azucares (g)") +
theme_minimal()
ggplot(UScereal4, aes(x = carbo, y = sugars)) +
geom_point(color = "#69b3a2") +
facet_wrap(~ cut(carbo, breaks = 4)) +
labs(title = "Relacion entre Carbohidratos y Azucares por Rangos de Carbohidratos",
x = "Carbohidratos (g)",
y = "Azucares (g)") +
theme_minimal()
ggplot(UScereal4, aes(x = cut(carbo, breaks = 4), y = sugars)) +
geom_boxplot(fill = "#69b3a2") +
labs(title = "Distribucion de Azucares por Rangos de Carbohidratos",
x = "Carbohidratos (g)",
y = "Azucares (g)") +
theme_minimal()
UScereal5 <- dplyr::select(UScereal, fibre, mfr)
UScereal5
## fibre mfr
## 100% Bran 30.303030 N
## All-Bran 27.272727 K
## All-Bran with Extra Fiber 28.000000 K
## Apple Cinnamon Cheerios 2.000000 G
## Apple Jacks 1.000000 K
## Basic 4 2.666667 G
## Bran Chex 5.970149 R
## Bran Flakes 7.462687 P
## Cap'n'Crunch 0.000000 Q
## Cheerios 1.600000 G
## Cinnamon Toast Crunch 0.000000 G
## Clusters 4.000000 G
## Cocoa Puffs 0.000000 G
## Corn Chex 0.000000 R
## Corn Flakes 1.000000 K
## Corn Pops 1.000000 K
## Count Chocula 0.000000 G
## Cracklin' Oat Bran 8.000000 K
## Crispix 1.000000 K
## Crispy Wheat & Raisins 2.666667 G
## Double Chex 1.333333 R
## Froot Loops 1.000000 K
## Frosted Flakes 1.333333 K
## Frosted Mini-Wheats 3.750000 K
## Fruit & Fibre: Dates Walnuts and Oats 7.462687 P
## Fruitful Bran 7.462687 K
## Fruity Pebbles 0.000000 P
## Golden Crisp 0.000000 P
## Golden Grahams 0.000000 G
## Grape Nuts Flakes 3.409091 P
## Grape-Nuts 12.000000 P
## Great Grains Pecan 9.090909 P
## Honey Graham Ohs 1.000000 Q
## Honey Nut Cheerios 2.000000 G
## Honey-comb 0.000000 P
## Just Right Fruit & Nut 2.666667 K
## Kix 0.000000 G
## Life 2.985075 Q
## Lucky Charms 0.000000 G
## Mueslix Crispy Blend 4.477612 K
## Multi-Grain Cheerios 2.000000 G
## Nut&Honey Crunch 0.000000 K
## Nutri-Grain Almond-Raisin 4.477612 K
## Oatmeal Raisin Crisp 3.000000 G
## Post Nat. Raisin Bran 8.955224 P
## Product 19 1.000000 K
## Puffed Rice 0.000000 Q
## Quaker Oat Squares 4.000000 Q
## Raisin Bran 6.666667 K
## Raisin Nut Bran 5.000000 G
## Raisin Squares 4.000000 K
## Rice Chex 0.000000 R
## Rice Krispies 0.000000 K
## Shredded Wheat 'n'Bran 5.970149 N
## Shredded Wheat spoon size 4.477612 N
## Smacks 1.333333 K
## Special K 1.000000 K
## Total Corn Flakes 0.000000 G
## Total Raisin Bran 4.000000 G
## Total Whole Grain 3.000000 G
## Triples 0.000000 G
## Trix 0.000000 G
## Wheat Chex 4.477612 R
## Wheaties 3.000000 G
## Wheaties Honey Gold 1.333333 G
str(UScereal5)
## 'data.frame': 65 obs. of 2 variables:
## $ fibre: num 30.3 27.3 28 2 1 ...
## $ mfr : Factor w/ 6 levels "G","K","N","P",..: 3 2 2 1 2 1 6 4 5 1 ...
ggplot(UScereal5, aes(x = mfr, y = fibre)) +
geom_boxplot(fill = "#69b3a2") +
labs(title = "Distribucion de Fibra por Fabricante",
x = "Fabricante",
y = "Fibra (g)") +
theme_minimal()
avg_fibre <- UScereal5 %>%
group_by(mfr) %>%
summarise(mean_fibre = mean(fibre))
ggplot(avg_fibre, aes(x = mfr, y = mean_fibre)) +
geom_bar(stat = "identity", fill = "#4682B4") +
geom_text(aes(label = format(mean_fibre, digits=2)), vjust = -0.5, size = 3) +
labs(title = "Fibra Promedio por Fabricante",
x = "Fabricante",
y = "Fibra Promedio (g)") +
theme_minimal()
help(format)
## starting httpd help server ... done
ggplot(UScereal5, aes(x = mfr, y = fibre)) +
geom_jitter(width = 0.2, color = "#69b3a2") +
labs(title = "Dispersion de la Fibra por Fabricante",
x = "Fabricante",
y = "Fibra (g)") +
theme_minimal()
UScereal6 <- dplyr::select(UScereal, sodium, sugars)
UScereal6
## sodium sugars
## 100% Bran 393.93939 18.181818
## All-Bran 787.87879 15.151515
## All-Bran with Extra Fiber 280.00000 0.000000
## Apple Cinnamon Cheerios 240.00000 13.333333
## Apple Jacks 125.00000 14.000000
## Basic 4 280.00000 10.666667
## Bran Chex 298.50746 8.955224
## Bran Flakes 313.43284 7.462687
## Cap'n'Crunch 293.33333 16.000000
## Cheerios 232.00000 0.800000
## Cinnamon Toast Crunch 280.00000 12.000000
## Clusters 280.00000 14.000000
## Cocoa Puffs 180.00000 13.000000
## Corn Chex 280.00000 3.000000
## Corn Flakes 290.00000 2.000000
## Corn Pops 90.00000 12.000000
## Count Chocula 180.00000 13.000000
## Cracklin' Oat Bran 280.00000 14.000000
## Crispix 220.00000 3.000000
## Crispy Wheat & Raisins 186.66667 13.333333
## Double Chex 253.33333 6.666667
## Froot Loops 125.00000 13.000000
## Frosted Flakes 266.66667 14.666667
## Frosted Mini-Wheats 0.00000 8.750000
## Fruit & Fibre: Dates Walnuts and Oats 238.80597 14.925373
## Fruitful Bran 358.20896 17.910448
## Fruity Pebbles 180.00000 16.000000
## Golden Crisp 51.13636 17.045455
## Golden Grahams 373.33333 12.000000
## Grape Nuts Flakes 159.09091 5.681818
## Grape-Nuts 680.00000 12.000000
## Great Grains Pecan 227.27273 12.121212
## Honey Graham Ohs 220.00000 11.000000
## Honey Nut Cheerios 333.33333 13.333333
## Honey-comb 135.33835 8.270677
## Just Right Fruit & Nut 226.66667 12.000000
## Kix 173.33333 2.000000
## Life 223.88060 8.955224
## Lucky Charms 180.00000 12.000000
## Mueslix Crispy Blend 223.88060 19.402985
## Multi-Grain Cheerios 220.00000 6.000000
## Nut&Honey Crunch 283.58209 13.432836
## Nutri-Grain Almond-Raisin 328.35821 10.447761
## Oatmeal Raisin Crisp 340.00000 20.000000
## Post Nat. Raisin Bran 298.50746 20.895522
## Product 19 320.00000 3.000000
## Puffed Rice 0.00000 0.000000
## Quaker Oat Squares 270.00000 12.000000
## Raisin Bran 280.00000 16.000000
## Raisin Nut Bran 280.00000 16.000000
## Raisin Squares 0.00000 12.000000
## Rice Chex 212.38938 1.769912
## Rice Krispies 290.00000 3.000000
## Shredded Wheat 'n'Bran 0.00000 0.000000
## Shredded Wheat spoon size 0.00000 0.000000
## Smacks 93.33333 20.000000
## Special K 230.00000 3.000000
## Total Corn Flakes 200.00000 3.000000
## Total Raisin Bran 190.00000 14.000000
## Total Whole Grain 200.00000 3.000000
## Triples 333.33333 4.000000
## Trix 140.00000 12.000000
## Wheat Chex 343.28358 4.477612
## Wheaties 200.00000 3.000000
## Wheaties Honey Gold 266.66667 10.666667
str(UScereal6)
## 'data.frame': 65 obs. of 2 variables:
## $ sodium: num 394 788 280 240 125 ...
## $ sugars: num 18.2 15.2 0 13.3 14 ...
ggplot(UScereal6, aes(x = sodium, y = sugars)) +
geom_point(color = "#69b3a2", size = 3) +
labs(title = "Relacion entre Sodio y Azucares en Cereales",
x = "Sodio (mg)",
y = "Azucares (g)") +
theme_minimal()
ggplot(UScereal6, aes(x = sodium)) +
geom_histogram(binwidth = 50, fill = "#69b3a2", color = "black") +
labs(title = "Distribucion de Sodio en Cereales",
x = "Sodio (mg)",
y = "Frecuencia") +
theme_minimal()
ggplot(UScereal6, aes(x = sugars)) +
geom_histogram(binwidth = 2, fill = "#1f78b4", color = "black") +
labs(title = "Distribucion de Azucares en Cereales",
x = "Azucares (g)",
y = "Frecuencia") +
theme_minimal()
mammals <- data.frame(mammals)
head(mammals)
## body brain
## Arctic fox 3.385 44.5
## Owl monkey 0.480 15.5
## Mountain beaver 1.350 8.1
## Cow 465.000 423.0
## Grey wolf 36.330 119.5
## Goat 27.660 115.0
# Gráfico de dispersión para visualizar la relación entre peso corporal y peso cerebral
ggplot(mammals, aes(x = body, y = brain)) +
geom_point(color = "#69b3a2", size = 3) +
labs(title = "Relacion entre Peso Corporal y Peso Cerebral en Mamiferos",
x = "Peso Corporal (kg)",
y = "Peso Cerebral (g)") +
theme_minimal()
correlation <- cor(mammals$body, mammals$brain)
print(paste("Correlación lineal: ", correlation))
## [1] "Correlación lineal: 0.934163842323355"
# Transformación logarítmica de los datos
mammals$log_body <- log(mammals$body)
mammals$log_brain <- log(mammals$brain)
# Gráfico de dispersión con datos transformados
ggplot(mammals, aes(x = log_body, y = log_brain)) +
geom_point(color = "#1f78b4", size = 3) +
labs(title = "Relacion Logaritmica entre Peso Corporal y Peso Cerebral",
x = "Log(Peso Corporal) (kg)",
y = "Log(Peso Cerebral) (g)") +
theme_minimal()
log_correlation <- cor(mammals$log_body, mammals$log_brain)
print(paste("Correlacion lineal despues de la transformacion logaritmica: ", log_correlation))
## [1] "Correlacion lineal despues de la transformacion logaritmica: 0.95957475837098"
anorexia <- data.frame(anorexia)
head(anorexia)
## Treat Prewt Postwt
## 1 Cont 80.7 80.2
## 2 Cont 89.4 80.1
## 3 Cont 91.8 86.4
## 4 Cont 74.0 86.3
## 5 Cont 78.1 76.1
## 6 Cont 88.3 78.1
str(anorexia)
## 'data.frame': 72 obs. of 3 variables:
## $ Treat : Factor w/ 3 levels "CBT","Cont","FT": 2 2 2 2 2 2 2 2 2 2 ...
## $ Prewt : num 80.7 89.4 91.8 74 78.1 88.3 87.3 75.1 80.6 78.4 ...
## $ Postwt: num 80.2 80.1 86.4 86.3 76.1 78.1 75.1 86.7 73.5 84.6 ...
# Calcular el cambio de peso
anorexia$WeightChange <- anorexia$Postwt - anorexia$Prewt
# Crear un boxplot del cambio de peso por tratamiento
ggplot(anorexia, aes(x = Treat, y = WeightChange, fill = Treat)) +
geom_boxplot() +
labs(title = "Cambio de Peso por Tratamiento en Pacientes Anorexia",
x = "Tratamiento",
y = "Cambio de Peso (Postwt - Prewt)") +
theme_minimal()
# Crear una columna indicando si hubo ganancia o pérdida de peso
anorexia$WeightStatus <- ifelse(anorexia$WeightChange > 0, "Gano Peso", "Perdio Peso")
# Crear un gráfico de barras
ggplot(anorexia, aes(x = Treat, fill = WeightStatus)) +
geom_bar(position = "dodge") +
labs(title = "Numero de Pacientes que Ganaron o Perdieron Peso por Tratamiento",
x = "Tratamiento",
y = "Numero de Pacientes") +
scale_fill_manual(values = c("Gano Peso" = "#69b3a2", "Perdio Peso" = "#ff6961")) +
theme_minimal()