Install and load the packages usingR and MASS.

#install.packages("UsingR")
#install.packages("MASS")
#install.packages("plotly")
#install.packages("ggplot2")
#install.packages("moments")
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Cargando paquete requerido: ggplot2
## 
## Adjuntando el paquete: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(MASS)
## 
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
library(ggplot2)
library(UsingR)
## Cargando paquete requerido: HistData
## Cargando paquete requerido: Hmisc
## 
## Adjuntando el paquete: 'Hmisc'
## The following object is masked from 'package:plotly':
## 
##     subplot
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(moments)

Brightness

Crear DataFrame

brightness <- data.frame(brillo = brightness)
head(brightness)
##   brillo
## 1   9.10
## 2   9.27
## 3   6.61
## 4   8.06
## 5   8.55
## 6  12.31

Grafico Histograma Brightness

grafico_brightness <- ggplot(brightness, aes(x=brillo)) +
  geom_histogram(aes(y = ..density..), binwidth = 2, fill = "lightgreen", alpha = 0.6, color = "black") +
  geom_density(color = "#1f78b4", linewidth = 1, alpha = 0.2, fill = "red") +
  geom_vline(xintercept = mean(brightness$brillo), color = "blue", linetype = "dashed") +
  labs(title = "Histograma y Grafico de Densidad del Brillo de Estrellas",
       x = "Brillo",
       y = "Densidad") +
  theme_minimal()
grafico_brightness
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Convertir el gráfico de ggplot2 a un gráfico interactivo con plotly
p_interactivo <- ggplotly(grafico_brightness)

# Mostrar el gráfico interactivo
p_interactivo

Graphically represent these data using a boxplot. Would you say that the data have “outliers”? What is the second smallest outlier?

boxplot(brightness,
        main = "Boxplot del Brillo de Estrellas",
        ylab = "Brillo",
        col = "lightgreen")

Valores Atipicos

# Calcular el IQR
Q1 <- quantile(brightness$brillo)[2]
Q3 <- quantile(brightness$brillo)[4]
IQR <- Q3 - Q1

# Límites para identificar valores atípicos
lower_bound <- Q1 - 1.5*IQR
upper_bound <- Q3 + 1.5*IQR

# Identificar valores atípicos
atipicos <- brightness[brightness < lower_bound | brightness > upper_bound]
print(c("Los valores atipicos son",atipicos))
##  [1] "Los valores atipicos son" "12.31"                   
##  [3] "11.71"                    "5.53"                    
##  [5] "11.28"                    "4.78"                    
##  [7] "5.13"                     "4.37"                    
##  [9] "5.04"                     "12.43"                   
## [11] "12.04"                    "4.55"                    
## [13] "11.55"                    "12.14"                   
## [15] "11.63"                    "4.99"                    
## [17] "11.67"                    "4.61"                    
## [19] "11.99"                    "12.04"                   
## [21] "5.55"                     "12.17"                   
## [23] "11.55"                    "11.79"                   
## [25] "12.19"                    "2.07"                    
## [27] "11.65"                    "11.73"                   
## [29] "2.28"                     "5.42"                    
## [31] "3.88"                     "5.54"                    
## [33] "5.29"                     "5.01"                    
## [35] "11.55"                    "4.89"                    
## [37] "11.8"                     "5.41"                    
## [39] "5.24"
atipico_dos <- sort(atipicos, decreasing = FALSE)
# segundo menor valor atipico
print(paste("El segundo menor valor atipico es:", atipico_dos[2]))
## [1] "El segundo menor valor atipico es: 2.28"

We want to keep data that cannot be considered outliers. Create a new variable called brightness.without containing only the values without outliers.

# Filtrando los datos que están dentro de los límites
datos_sin_atipicos <- brightness[brightness >= lower_bound & brightness <= upper_bound]
datos_sin_atipicos
##   [1]  9.10  9.27  6.61  8.06  8.55  9.64  9.05  8.59  8.59  7.34  8.43  8.80
##  [13]  7.25  8.60  8.15 11.03  6.53  8.51  7.55  8.69  7.57  9.05  6.28  9.13
##  [25]  9.32  8.83  9.14  8.26  7.63  9.09  8.10  6.43  9.07  7.68 10.44  8.65
##  [37]  7.46  8.70 10.61  8.20  6.18  7.91  9.59  8.57 10.78  7.31  9.53  6.49
##  [49]  8.94  8.56 10.96 10.57  7.40  8.12  8.27  7.05  9.09  8.34  8.86  8.27
##  [61]  6.36  8.08 11.00  8.55  7.83  8.79  8.33 10.42  8.26  8.97  6.90  9.93
##  [73]  7.42  9.03  8.41  8.06  8.69  8.40  8.57  9.50  8.85  9.61 10.62  8.05
##  [85]  7.80  5.71  7.87  7.64  7.66  8.68  8.12 10.10  8.67 10.46  9.87  9.48
##  [97]  7.04  8.44  9.88  7.05  8.29  9.34  7.73  6.22  8.53  7.23  8.61 10.76
## [109]  8.93  7.95  7.46  8.60  8.55  9.20  6.82  8.29  6.83  7.21  5.58  8.70
## [121]  8.06 10.86  6.50  9.32  9.14  8.13 10.62  6.62  9.96  8.64  6.60  6.25
## [133]  7.83 10.03  9.04  8.47  7.33  8.66 10.35  8.96  8.49 11.26  8.15  7.04
## [145] 10.02  8.90  7.78  9.93  8.60  8.51  7.09  6.93  8.68  8.98  9.84  8.98
## [157]  7.98 10.16  8.86  8.58  9.56  9.24  9.63  5.80  9.05  8.45  8.86  7.84
## [169]  8.86  8.93  7.97  6.90  8.47  6.77  8.55  8.48  8.53  6.33  8.99  8.64
## [181]  9.55  8.74  8.16  9.46  5.70  7.62  8.95  8.97  8.94  7.24 10.32  8.24
## [193]  8.62  9.18  8.53  8.54  8.56  9.41  5.87  7.20  9.05  9.52 10.24  7.70
## [205]  8.17  7.29  9.26  7.94  8.42  8.56  7.52  7.74  8.85  9.01  7.17  9.04
## [217] 10.30  9.86  7.64  8.27  8.44  9.58  8.43  8.49  9.64  9.17  8.09  9.00
## [229]  6.25  8.56 10.81  8.76  7.76  7.82  7.90  8.52  9.73  9.19  8.10  8.75
## [241]  8.14  8.65 10.30  6.46  6.73  7.96  9.53  8.87  6.59  8.65  9.64  9.15
## [253]  9.04  8.42  8.09  9.06  8.09  8.18  8.77  7.36  9.16  8.82 11.14  6.24
## [265]  9.44  7.49  6.96  7.94  8.69  8.15  8.45  7.92  7.45  9.01  8.55  9.23
## [277]  9.16  7.90  8.68  7.78  8.21  8.11  8.29  7.89  9.67  8.24  6.80  8.18
## [289]  8.44  7.45  6.31  8.15  8.27  7.66  8.59  7.09  8.54  9.58  8.44  8.59
## [301]  8.01  8.29  9.62  7.26  7.91  9.45  8.19  8.93  7.65  8.53  7.38  8.56
## [313]  8.76  9.56  7.09  9.83  5.90 10.80  8.41  9.05  8.79  8.88  7.59  9.60
## [325] 10.66  8.55  8.11  9.44  9.60  5.78 10.66  6.38  8.80  7.79  8.60  7.77
## [337] 10.37  9.80 10.42  9.22  8.43  7.33  8.93  9.09  9.26  8.73  9.18  8.12
## [349]  9.26  8.94  6.11  9.13  7.90  9.34  7.13 10.82  7.46  8.72  7.02  9.08
## [361]  8.37  5.59  7.37  5.68  8.56  8.72  9.06  8.82  8.18  9.39  9.10  8.46
## [373]  9.15  8.28  8.18  7.93  9.21  6.09  8.31  7.83  8.72  6.61  6.25  7.82
## [385]  8.66  8.15  8.97  8.15  7.47  8.63  8.13  8.23  8.41  6.47  9.83  8.64
## [397]  7.73  8.64  8.94  8.84  6.32  5.80  8.97  7.53  7.41  7.80  8.14  6.71
## [409]  8.73  9.37  8.69  9.95  7.10  8.09  6.88  9.48  9.04  9.30  8.49  8.30
## [421]  7.95  7.08  6.93  8.38  8.56  8.78  7.42  8.26  7.71  6.91  9.16  8.99
## [433]  8.63  9.90  7.59  7.39  7.78  7.47  6.97  8.82  9.13  7.86  7.13  9.45
## [445]  8.78  7.23  9.73  7.36  7.36  8.47  9.37  6.99  8.20  8.36  8.22  9.91
## [457]  9.67  8.60 10.07 10.15  7.75  9.21  9.66  8.47  9.37  9.44  9.99 10.38
## [469]  7.51  8.91  7.45  9.57  8.99  8.58  6.90  7.55  7.93  9.71  9.57  8.55
## [481]  6.62  7.89  7.51  7.36  8.66  8.51  6.65  9.67  7.80  8.21  7.90  8.94
## [493]  9.82  8.69  8.57  8.89  5.98  7.92  7.60  8.22  5.70  8.75  6.93  7.97
## [505]  8.06 10.13  7.31  8.35  5.57  9.85  9.16  9.03 10.07  9.76  9.35 10.95
## [517]  8.87  6.68  9.69  8.05 10.30  6.07  8.51  7.71  8.56  8.26  8.62 10.92
## [529] 10.51  9.83  9.84  9.74  8.21  8.72  8.03  9.00  6.19  8.22  7.93 10.18
## [541]  8.98  9.13  6.91  8.79  8.23 10.24  8.83  7.62  8.96 10.41  8.97  9.61
## [553]  8.29  8.30  8.26  7.44  9.52  8.20  8.68  8.65 10.52  8.41  9.18  8.42
## [565]  8.86  7.92 10.97  8.85  9.31 10.28  7.56  7.88  7.99  8.23  8.52  9.14
## [577]  6.20  7.64  8.95  7.48  7.06  7.33  8.98  8.24  8.53  8.40  7.48  8.46
## [589]  9.29  8.57  8.70  8.50  8.37  6.87  7.50  7.39  8.19  7.56  8.37  7.39
## [601]  6.73  8.66  8.25  8.47  8.01  6.83  9.06  8.79  7.44  6.43  5.93  8.85
## [613]  9.86  8.55  7.66  7.82  9.08 10.10  8.21  8.85  7.79  7.58  7.85  7.18
## [625]  7.54  9.72  7.12  9.77  8.84  5.67  8.15  9.61  8.19  7.27  8.51  8.36
## [637] 10.00  8.74  6.18 10.26 10.16  8.31  8.58  7.04  8.81  5.99  8.22  9.86
## [649]  8.00  9.40  9.10  8.11  8.89  9.43  7.59  8.72  9.86  9.23  9.50 10.73
## [661]  7.59  7.41  9.26  7.78  7.76  8.94  8.95  6.41  6.11  7.76  7.38  6.21
## [673]  7.05  7.44  8.50  7.84 11.01  7.88  9.10  8.65  8.41  7.81  7.43  8.76
## [685]  7.58  9.55  6.82 10.24  6.24  7.31 10.52  9.27  7.13  9.14  8.48  8.57
## [697]  7.21  9.05  7.72  8.03  6.47  5.57  6.32  7.78  8.58 10.37  9.23  9.20
## [709]  6.93  9.32  7.11  9.79  8.21  8.42  7.05  9.26  8.77  9.25  9.30 10.63
## [721]  9.90  9.89  9.33  7.78  7.02 11.26  8.89  9.60  7.07  6.01  9.11  8.24
## [733]  8.97  8.59  7.17  7.94  7.27  9.59  7.94  8.52  7.59  9.17  8.08  9.80
## [745]  8.92  9.91  9.42  8.84 10.15  8.37  9.33  9.35  7.40  8.35  9.53  9.59
## [757] 10.05  8.57  8.48  8.43  8.45  8.84 11.18  8.64  8.42  6.34  7.93  8.36
## [769]  8.32  7.77  6.84  8.78  7.19  8.50  8.82  9.04  7.93  7.66 10.07  9.03
## [781]  8.13  7.51  9.08  7.10  7.88  9.40  9.06  8.38 10.65  7.77  8.50  8.61
## [793] 10.05  8.71  9.37  6.97  8.56  9.34  9.47  8.11  8.91  7.83  8.95  7.20
## [805]  9.37  5.84  9.81  9.27  9.50  9.32  8.92  8.38  7.74  8.60  9.49  8.35
## [817]  7.11  9.87  8.98  7.75  8.24  6.74  6.83  7.70  6.70  8.67  9.94  8.73
## [829]  9.63  6.66  8.29  8.47  8.16  8.97  7.51  8.97  8.55  5.84  7.85  8.68
## [841]  8.05  8.27  7.68  9.40  7.77  6.89  7.55  8.27  8.16  8.07  7.91  7.71
## [853] 10.16  8.41  8.88  9.64  7.93  7.78  8.90  8.55  9.15 10.86  9.08  7.44
## [865] 10.35  6.68  8.85  8.90  8.24  6.74 10.75  8.44  7.69  8.88  7.70  8.60
## [877]  8.44  9.50  9.03  7.15  7.95  8.23  9.81  8.48  9.33  8.97  8.08  7.47
## [889]  8.34  7.75  8.34  7.56  6.93 10.03  8.69  9.04  8.32  7.85  7.21  8.98
## [901]  7.09  8.85  9.21  8.61  7.91  7.47  8.65  8.53  9.92  8.09  7.06  8.45
## [913]  8.73  7.45  9.02  7.51  7.32  8.17  9.45  9.72  9.34  8.75  9.32  7.91
## [925]  7.49  6.53  6.18  8.69

Describe the shape of the distribution, any skewness, and the presence of any modes.

library(ggplot2)
library(moments)

# Cálculo de medidas estadísticas
media <- mean(brightness$brillo)
mediana <- median(brightness$brillo)
moda <- names(sort(table(brightness$brillo), decreasing = TRUE))[1]
desv_std <- sd(brightness$brillo)
rango <- range(brightness$brillo)
iqr <- IQR(brightness$brillo)
asimetria <- skewness(brightness$brillo)
curtosis <- kurtosis(brightness$brillo)

# Imprimir resultados
cat("Media:", media, "\n")
## Media: 8.417743
cat("Mediana:", mediana, "\n")
## Mediana: 8.5
cat("Moda:", moda, "\n")
## Moda: 8.55
cat("Desviación estándar:", desv_std, "\n")
## Desviación estándar: 1.294231
cat("Rango:", rango, "\n")
## Rango: 2.07 12.43
cat("IQR:", iqr, "\n")
## IQR: 1.4275
cat("Asimetría:", asimetria, "\n")
## Asimetría: -0.2601868
cat("Curtosis:", curtosis, "\n")
## Curtosis: 4.550829

Tendencia Central

Media cercana a la mediana: El hecho de que la media (49.65) esté muy cerca de la mediana (49.99) sugiere que la distribución es aproximadamente simétrica. Es decir, los datos no están fuertemente sesgados hacia valores altos o bajos.

Moda: La moda (17.08) es considerablemente menor que la media y la mediana. Esto indica que hay una concentración de datos en valores más bajos, pero no es suficiente para decir que la distribución es bimodal.

Dispersión

Desviación estándar: Un valor de 9.95 indica una dispersión moderada alrededor de la media. Los datos están relativamente dispersos, pero no extremadamente.

Rango e IQR: El rango (17.09 - 78.52) y el IQR (13.95) nos dan una idea de la extensión de los datos. El rango indica el alcance total de los valores, mientras que el IQR nos dice el rango de los valores centrales (entre el primer y tercer cuartil).

Forma

Asimetría: Un valor de asimetría cercano a cero (-0.04) confirma la observación inicial de que la distribución es casi simétrica. Un valor negativo ligeramente menor a cero sugiere una muy leve tendencia a la asimetría negativa (cola ligeramente más larga hacia valores bajos), pero es prácticamente despreciable.

Curtosis: Un valor de curtosis de 2.92 indica que la distribución es ligeramente más apuntada (leptocúrtica) que una distribución normal estándar (que tiene una curtosis de 3). Esto significa que hay una concentración relativamente mayor de datos cerca de la media y en las colas, en comparación con una distribución normal.

UScereal

Crear DataFrame

UScereal <- data.frame(UScereal)
head(UScereal)
##                           mfr calories   protein      fat   sodium     fibre
## 100% Bran                   N 212.1212 12.121212 3.030303 393.9394 30.303030
## All-Bran                    K 212.1212 12.121212 3.030303 787.8788 27.272727
## All-Bran with Extra Fiber   K 100.0000  8.000000 0.000000 280.0000 28.000000
## Apple Cinnamon Cheerios     G 146.6667  2.666667 2.666667 240.0000  2.000000
## Apple Jacks                 K 110.0000  2.000000 0.000000 125.0000  1.000000
## Basic 4                     G 173.3333  4.000000 2.666667 280.0000  2.666667
##                              carbo   sugars shelf potassium vitamins
## 100% Bran                 15.15152 18.18182     3 848.48485 enriched
## All-Bran                  21.21212 15.15151     3 969.69697 enriched
## All-Bran with Extra Fiber 16.00000  0.00000     3 660.00000 enriched
## Apple Cinnamon Cheerios   14.00000 13.33333     1  93.33333 enriched
## Apple Jacks               11.00000 14.00000     2  30.00000 enriched
## Basic 4                   24.00000 10.66667     3 133.33333 enriched
colnames(UScereal)
##  [1] "mfr"       "calories"  "protein"   "fat"       "sodium"    "fibre"    
##  [7] "carbo"     "sugars"    "shelf"     "potassium" "vitamins"

Manufacturer y Shelf

UScereal1 <- dplyr::select(UScereal, mfr, shelf)
UScereal1$shelf <- factor(UScereal1$shelf)
valores_shelf <- levels(UScereal1$shelf)
valores_shelf
## [1] "1" "2" "3"
head(UScereal1)
##                           mfr shelf
## 100% Bran                   N     3
## All-Bran                    K     3
## All-Bran with Extra Fiber   K     3
## Apple Cinnamon Cheerios     G     1
## Apple Jacks                 K     2
## Basic 4                     G     3
# Crear un gráfico de barras agrupadas
ggplot(UScereal1, aes(x = mfr, fill = factor(shelf))) +
  geom_bar(position = "dodge") +  # "dodge" para barras agrupadas
  geom_text(stat = "count", aes(label = ..count..), position = position_dodge(width = 0.9), vjust = -0.5) + 
  labs(title = "Fabricantes por Estantes",
       x = "Fabricante",
       y = "Cantidad",
       fill = "Estantes") +
  theme_minimal()

Fat y Vitamins

UScereal2 <- dplyr::select(UScereal, fat, vitamins)
UScereal2
##                                             fat vitamins
## 100% Bran                             3.0303030 enriched
## All-Bran                              3.0303030 enriched
## All-Bran with Extra Fiber             0.0000000 enriched
## Apple Cinnamon Cheerios               2.6666667 enriched
## Apple Jacks                           0.0000000 enriched
## Basic 4                               2.6666667 enriched
## Bran Chex                             1.4925373 enriched
## Bran Flakes                           0.0000000 enriched
## Cap'n'Crunch                          2.6666667 enriched
## Cheerios                              1.6000000 enriched
## Cinnamon Toast Crunch                 4.0000000 enriched
## Clusters                              4.0000000 enriched
## Cocoa Puffs                           1.0000000 enriched
## Corn Chex                             0.0000000 enriched
## Corn Flakes                           0.0000000 enriched
## Corn Pops                             0.0000000 enriched
## Count Chocula                         1.0000000 enriched
## Cracklin' Oat Bran                    6.0000000 enriched
## Crispix                               0.0000000 enriched
## Crispy Wheat & Raisins                1.3333333 enriched
## Double Chex                           0.0000000 enriched
## Froot Loops                           1.0000000 enriched
## Frosted Flakes                        0.0000000 enriched
## Frosted Mini-Wheats                   0.0000000 enriched
## Fruit & Fibre: Dates Walnuts and Oats 2.9850746 enriched
## Fruitful Bran                         0.0000000 enriched
## Fruity Pebbles                        1.3333333 enriched
## Golden Crisp                          0.0000000 enriched
## Golden Grahams                        1.3333333 enriched
## Grape Nuts Flakes                     1.1363636 enriched
## Grape-Nuts                            0.0000000 enriched
## Great Grains Pecan                    9.0909091 enriched
## Honey Graham Ohs                      2.0000000 enriched
## Honey Nut Cheerios                    1.3333333 enriched
## Honey-comb                            0.0000000 enriched
## Just Right Fruit & Nut                1.3333333     100%
## Kix                                   0.6666667 enriched
## Life                                  2.9850746 enriched
## Lucky Charms                          1.0000000 enriched
## Mueslix Crispy Blend                  2.9850746 enriched
## Multi-Grain Cheerios                  1.0000000 enriched
## Nut&Honey Crunch                      1.4925373 enriched
## Nutri-Grain Almond-Raisin             2.9850746 enriched
## Oatmeal Raisin Crisp                  4.0000000 enriched
## Post Nat. Raisin Bran                 1.4925373 enriched
## Product 19                            0.0000000     100%
## Puffed Rice                           0.0000000     none
## Quaker Oat Squares                    2.0000000 enriched
## Raisin Bran                           1.3333333 enriched
## Raisin Nut Bran                       4.0000000 enriched
## Raisin Squares                        0.0000000 enriched
## Rice Chex                             0.0000000 enriched
## Rice Krispies                         0.0000000 enriched
## Shredded Wheat 'n'Bran                0.0000000     none
## Shredded Wheat spoon size             0.0000000     none
## Smacks                                1.3333333 enriched
## Special K                             0.0000000 enriched
## Total Corn Flakes                     1.0000000     100%
## Total Raisin Bran                     1.0000000     100%
## Total Whole Grain                     1.0000000     100%
## Triples                               1.3333333 enriched
## Trix                                  1.0000000 enriched
## Wheat Chex                            1.4925373 enriched
## Wheaties                              1.0000000 enriched
## Wheaties Honey Gold                   1.3333333 enriched
str(UScereal2)
## 'data.frame':    65 obs. of  2 variables:
##  $ fat     : num  3.03 3.03 0 2.67 0 ...
##  $ vitamins: Factor w/ 3 levels "100%","enriched",..: 2 2 2 2 2 2 2 2 2 2 ...
ggplot(UScereal2, aes(x = vitamins, y = fat)) +
  geom_boxplot(fill = "#69b3a2") +
  labs(title = "Distribucion de Grasa por Nivel de Vitaminas",
       x = "Nivel de Vitaminas",
       y = "Grasa (g)") +
  theme_minimal()

ggplot(UScereal2, aes(x = vitamins, y = fat)) +
  geom_violin(fill = "#69b3a2", color = "black") +
  labs(title = "Distribucion de Grasa por Nivel de Vitaminas",
       x = "Nivel de Vitaminas",
       y = "Grasa (g)") +
  theme_minimal()

ggplot(UScereal2, aes(x = vitamins, y = fat, fill = vitamins)) +
  stat_summary(fun = "mean", geom = "bar") +
  labs(title = "Media de Grasa por Nivel de Vitaminas",
       x = "Nivel de Vitaminas",
       y = "Media de Grasa (g)") +
  theme_minimal() +
  theme(legend.position = "none")

ggplot(UScereal2, aes(x = fat, fill = vitamins)) +
  geom_histogram(binwidth = 1, color = "black", alpha = 0.7) +
  facet_wrap(~ vitamins, scales = "free") +
  labs(title = "Histograma de Grasa por Nivel de Vitaminas",
       x = "Grasa (g)",
       y = "Frecuencia") +
  theme_minimal()

Fat y Shelf

UScereal3 <- dplyr::select(UScereal, fat, shelf)
UScereal3
##                                             fat shelf
## 100% Bran                             3.0303030     3
## All-Bran                              3.0303030     3
## All-Bran with Extra Fiber             0.0000000     3
## Apple Cinnamon Cheerios               2.6666667     1
## Apple Jacks                           0.0000000     2
## Basic 4                               2.6666667     3
## Bran Chex                             1.4925373     1
## Bran Flakes                           0.0000000     3
## Cap'n'Crunch                          2.6666667     2
## Cheerios                              1.6000000     1
## Cinnamon Toast Crunch                 4.0000000     2
## Clusters                              4.0000000     3
## Cocoa Puffs                           1.0000000     2
## Corn Chex                             0.0000000     1
## Corn Flakes                           0.0000000     1
## Corn Pops                             0.0000000     2
## Count Chocula                         1.0000000     2
## Cracklin' Oat Bran                    6.0000000     3
## Crispix                               0.0000000     3
## Crispy Wheat & Raisins                1.3333333     3
## Double Chex                           0.0000000     3
## Froot Loops                           1.0000000     2
## Frosted Flakes                        0.0000000     1
## Frosted Mini-Wheats                   0.0000000     2
## Fruit & Fibre: Dates Walnuts and Oats 2.9850746     3
## Fruitful Bran                         0.0000000     3
## Fruity Pebbles                        1.3333333     2
## Golden Crisp                          0.0000000     1
## Golden Grahams                        1.3333333     2
## Grape Nuts Flakes                     1.1363636     3
## Grape-Nuts                            0.0000000     3
## Great Grains Pecan                    9.0909091     3
## Honey Graham Ohs                      2.0000000     2
## Honey Nut Cheerios                    1.3333333     1
## Honey-comb                            0.0000000     1
## Just Right Fruit & Nut                1.3333333     3
## Kix                                   0.6666667     2
## Life                                  2.9850746     2
## Lucky Charms                          1.0000000     2
## Mueslix Crispy Blend                  2.9850746     3
## Multi-Grain Cheerios                  1.0000000     1
## Nut&Honey Crunch                      1.4925373     2
## Nutri-Grain Almond-Raisin             2.9850746     3
## Oatmeal Raisin Crisp                  4.0000000     3
## Post Nat. Raisin Bran                 1.4925373     3
## Product 19                            0.0000000     3
## Puffed Rice                           0.0000000     3
## Quaker Oat Squares                    2.0000000     3
## Raisin Bran                           1.3333333     2
## Raisin Nut Bran                       4.0000000     3
## Raisin Squares                        0.0000000     3
## Rice Chex                             0.0000000     1
## Rice Krispies                         0.0000000     1
## Shredded Wheat 'n'Bran                0.0000000     1
## Shredded Wheat spoon size             0.0000000     1
## Smacks                                1.3333333     2
## Special K                             0.0000000     1
## Total Corn Flakes                     1.0000000     3
## Total Raisin Bran                     1.0000000     3
## Total Whole Grain                     1.0000000     3
## Triples                               1.3333333     3
## Trix                                  1.0000000     2
## Wheat Chex                            1.4925373     1
## Wheaties                              1.0000000     1
## Wheaties Honey Gold                   1.3333333     1
str(UScereal3)
## 'data.frame':    65 obs. of  2 variables:
##  $ fat  : num  3.03 3.03 0 2.67 0 ...
##  $ shelf: int  3 3 3 1 2 3 1 3 2 1 ...
ggplot(UScereal3, aes(x = factor(shelf), y = fat)) +
  geom_boxplot(fill = "#69b3a2") +
  labs(title = "Distribucion de Grasa por Estante",
       x = "Estante",
       y = "Grasa (g)") +
  theme_minimal()

ggplot(UScereal3, aes(x = factor(shelf), y = fat, fill = factor(shelf))) +
  stat_summary(fun = "mean", geom = "bar") +
  labs(title = "Media de Grasa por Estante",
       x = "Estante",
       y = "Media de Grasa (g)") +
  theme_minimal() +
  theme(legend.position = "none")

ggplot(UScereal3, aes(x = shelf, y = fat)) +
  geom_point(color = "#69b3a2") +
  labs(title = "Grasa por Estante",
       x = "Estante",
       y = "Grasa (g)") +
  theme_minimal()

ggplot(UScereal3, aes(x = fat, fill = factor(shelf))) +
  geom_histogram(binwidth = 1, color = "black", alpha = 0.7) +
  facet_wrap(~ shelf, scales = "free") +
  labs(title = "Histograma de Grasa por Estante",
       x = "Grasa (g)",
       y = "Frecuencia") +
  theme_minimal()

Carbohydrates y Sugars

head(UScereal)
##                           mfr calories   protein      fat   sodium     fibre
## 100% Bran                   N 212.1212 12.121212 3.030303 393.9394 30.303030
## All-Bran                    K 212.1212 12.121212 3.030303 787.8788 27.272727
## All-Bran with Extra Fiber   K 100.0000  8.000000 0.000000 280.0000 28.000000
## Apple Cinnamon Cheerios     G 146.6667  2.666667 2.666667 240.0000  2.000000
## Apple Jacks                 K 110.0000  2.000000 0.000000 125.0000  1.000000
## Basic 4                     G 173.3333  4.000000 2.666667 280.0000  2.666667
##                              carbo   sugars shelf potassium vitamins
## 100% Bran                 15.15152 18.18182     3 848.48485 enriched
## All-Bran                  21.21212 15.15151     3 969.69697 enriched
## All-Bran with Extra Fiber 16.00000  0.00000     3 660.00000 enriched
## Apple Cinnamon Cheerios   14.00000 13.33333     1  93.33333 enriched
## Apple Jacks               11.00000 14.00000     2  30.00000 enriched
## Basic 4                   24.00000 10.66667     3 133.33333 enriched
UScereal4 <- dplyr::select(UScereal, carbo, sugars)
UScereal4
##                                          carbo    sugars
## 100% Bran                             15.15152 18.181818
## All-Bran                              21.21212 15.151515
## All-Bran with Extra Fiber             16.00000  0.000000
## Apple Cinnamon Cheerios               14.00000 13.333333
## Apple Jacks                           11.00000 14.000000
## Basic 4                               24.00000 10.666667
## Bran Chex                             22.38806  8.955224
## Bran Flakes                           19.40299  7.462687
## Cap'n'Crunch                          16.00000 16.000000
## Cheerios                              13.60000  0.800000
## Cinnamon Toast Crunch                 17.33333 12.000000
## Clusters                              26.00000 14.000000
## Cocoa Puffs                           12.00000 13.000000
## Corn Chex                             22.00000  3.000000
## Corn Flakes                           21.00000  2.000000
## Corn Pops                             13.00000 12.000000
## Count Chocula                         12.00000 13.000000
## Cracklin' Oat Bran                    20.00000 14.000000
## Crispix                               21.00000  3.000000
## Crispy Wheat & Raisins                14.66667 13.333333
## Double Chex                           24.00000  6.666667
## Froot Loops                           11.00000 13.000000
## Frosted Flakes                        18.66667 14.666667
## Frosted Mini-Wheats                   17.50000  8.750000
## Fruit & Fibre: Dates Walnuts and Oats 17.91045 14.925373
## Fruitful Bran                         20.89552 17.910448
## Fruity Pebbles                        17.33333 16.000000
## Golden Crisp                          12.50000 17.045455
## Golden Grahams                        20.00000 12.000000
## Grape Nuts Flakes                     17.04545  5.681818
## Grape-Nuts                            68.00000 12.000000
## Great Grains Pecan                    39.39394 12.121212
## Honey Graham Ohs                      12.00000 11.000000
## Honey Nut Cheerios                    15.33333 13.333333
## Honey-comb                            10.52632  8.270677
## Just Right Fruit & Nut                26.66667 12.000000
## Kix                                   14.00000  2.000000
## Life                                  17.91045  8.955224
## Lucky Charms                          12.00000 12.000000
## Mueslix Crispy Blend                  25.37313 19.402985
## Multi-Grain Cheerios                  15.00000  6.000000
## Nut&Honey Crunch                      22.38806 13.432836
## Nutri-Grain Almond-Raisin             31.34328 10.447761
## Oatmeal Raisin Crisp                  27.00000 20.000000
## Post Nat. Raisin Bran                 16.41791 20.895522
## Product 19                            20.00000  3.000000
## Puffed Rice                           13.00000  0.000000
## Quaker Oat Squares                    28.00000 12.000000
## Raisin Bran                           18.66667 16.000000
## Raisin Nut Bran                       21.00000 16.000000
## Raisin Squares                        30.00000 12.000000
## Rice Chex                             20.35398  1.769912
## Rice Krispies                         22.00000  3.000000
## Shredded Wheat 'n'Bran                28.35821  0.000000
## Shredded Wheat spoon size             29.85075  0.000000
## Smacks                                12.00000 20.000000
## Special K                             16.00000  3.000000
## Total Corn Flakes                     21.00000  3.000000
## Total Raisin Bran                     15.00000 14.000000
## Total Whole Grain                     16.00000  3.000000
## Triples                               28.00000  4.000000
## Trix                                  13.00000 12.000000
## Wheat Chex                            25.37313  4.477612
## Wheaties                              17.00000  3.000000
## Wheaties Honey Gold                   21.33333 10.666667
str(UScereal4)
## 'data.frame':    65 obs. of  2 variables:
##  $ carbo : num  15.2 21.2 16 14 11 ...
##  $ sugars: num  18.2 15.2 0 13.3 14 ...
ggplot(UScereal4, aes(x = carbo, y = sugars)) +
  geom_point(color = "#69b3a2") +
  labs(title = "Relacion entre Carbohidratos y Azucares",
       x = "Carbohidratos (g)",
       y = "Azucares (g)") +
  theme_minimal()

ggplot(UScereal4, aes(x = carbo, y = sugars)) +
  geom_density2d() +
  labs(title = "Densidad Conjunta de Carbohidratos y Azucares",
       x = "Carbohidratos (g)",
       y = "Azucares (g)") +
  theme_minimal()

ggplot(UScereal4, aes(x = carbo, y = sugars)) +
  geom_point(color = "#69b3a2") +
  facet_wrap(~ cut(carbo, breaks = 4)) +
  labs(title = "Relacion entre Carbohidratos y Azucares por Rangos de Carbohidratos",
       x = "Carbohidratos (g)",
       y = "Azucares (g)") +
  theme_minimal()

ggplot(UScereal4, aes(x = cut(carbo, breaks = 4), y = sugars)) +
  geom_boxplot(fill = "#69b3a2") +
  labs(title = "Distribucion de Azucares por Rangos de Carbohidratos",
       x = "Carbohidratos (g)",
       y = "Azucares (g)") +
  theme_minimal()

Fibre y Manufacturer

UScereal5 <- dplyr::select(UScereal, fibre, mfr)
UScereal5
##                                           fibre mfr
## 100% Bran                             30.303030   N
## All-Bran                              27.272727   K
## All-Bran with Extra Fiber             28.000000   K
## Apple Cinnamon Cheerios                2.000000   G
## Apple Jacks                            1.000000   K
## Basic 4                                2.666667   G
## Bran Chex                              5.970149   R
## Bran Flakes                            7.462687   P
## Cap'n'Crunch                           0.000000   Q
## Cheerios                               1.600000   G
## Cinnamon Toast Crunch                  0.000000   G
## Clusters                               4.000000   G
## Cocoa Puffs                            0.000000   G
## Corn Chex                              0.000000   R
## Corn Flakes                            1.000000   K
## Corn Pops                              1.000000   K
## Count Chocula                          0.000000   G
## Cracklin' Oat Bran                     8.000000   K
## Crispix                                1.000000   K
## Crispy Wheat & Raisins                 2.666667   G
## Double Chex                            1.333333   R
## Froot Loops                            1.000000   K
## Frosted Flakes                         1.333333   K
## Frosted Mini-Wheats                    3.750000   K
## Fruit & Fibre: Dates Walnuts and Oats  7.462687   P
## Fruitful Bran                          7.462687   K
## Fruity Pebbles                         0.000000   P
## Golden Crisp                           0.000000   P
## Golden Grahams                         0.000000   G
## Grape Nuts Flakes                      3.409091   P
## Grape-Nuts                            12.000000   P
## Great Grains Pecan                     9.090909   P
## Honey Graham Ohs                       1.000000   Q
## Honey Nut Cheerios                     2.000000   G
## Honey-comb                             0.000000   P
## Just Right Fruit & Nut                 2.666667   K
## Kix                                    0.000000   G
## Life                                   2.985075   Q
## Lucky Charms                           0.000000   G
## Mueslix Crispy Blend                   4.477612   K
## Multi-Grain Cheerios                   2.000000   G
## Nut&Honey Crunch                       0.000000   K
## Nutri-Grain Almond-Raisin              4.477612   K
## Oatmeal Raisin Crisp                   3.000000   G
## Post Nat. Raisin Bran                  8.955224   P
## Product 19                             1.000000   K
## Puffed Rice                            0.000000   Q
## Quaker Oat Squares                     4.000000   Q
## Raisin Bran                            6.666667   K
## Raisin Nut Bran                        5.000000   G
## Raisin Squares                         4.000000   K
## Rice Chex                              0.000000   R
## Rice Krispies                          0.000000   K
## Shredded Wheat 'n'Bran                 5.970149   N
## Shredded Wheat spoon size              4.477612   N
## Smacks                                 1.333333   K
## Special K                              1.000000   K
## Total Corn Flakes                      0.000000   G
## Total Raisin Bran                      4.000000   G
## Total Whole Grain                      3.000000   G
## Triples                                0.000000   G
## Trix                                   0.000000   G
## Wheat Chex                             4.477612   R
## Wheaties                               3.000000   G
## Wheaties Honey Gold                    1.333333   G
str(UScereal5)
## 'data.frame':    65 obs. of  2 variables:
##  $ fibre: num  30.3 27.3 28 2 1 ...
##  $ mfr  : Factor w/ 6 levels "G","K","N","P",..: 3 2 2 1 2 1 6 4 5 1 ...
ggplot(UScereal5, aes(x = mfr, y = fibre)) +
  geom_boxplot(fill = "#69b3a2") +
  labs(title = "Distribucion de Fibra por Fabricante",
       x = "Fabricante",
       y = "Fibra (g)") +
  theme_minimal()

avg_fibre <- UScereal5 %>%
  group_by(mfr) %>%
  summarise(mean_fibre = mean(fibre))

ggplot(avg_fibre, aes(x = mfr, y = mean_fibre)) +
  geom_bar(stat = "identity", fill = "#4682B4") +
  geom_text(aes(label = format(mean_fibre, digits=2)), vjust = -0.5, size = 3) + 
  labs(title = "Fibra Promedio por Fabricante",
       x = "Fabricante",
       y = "Fibra Promedio (g)") +
  theme_minimal()

help(format)
## starting httpd help server ... done
ggplot(UScereal5, aes(x = mfr, y = fibre)) +
  geom_jitter(width = 0.2, color = "#69b3a2") +
  labs(title = "Dispersion de la Fibra por Fabricante",
       x = "Fabricante",
       y = "Fibra (g)") +
  theme_minimal()

Sodium y Sugars

UScereal6 <- dplyr::select(UScereal, sodium, sugars)
UScereal6
##                                          sodium    sugars
## 100% Bran                             393.93939 18.181818
## All-Bran                              787.87879 15.151515
## All-Bran with Extra Fiber             280.00000  0.000000
## Apple Cinnamon Cheerios               240.00000 13.333333
## Apple Jacks                           125.00000 14.000000
## Basic 4                               280.00000 10.666667
## Bran Chex                             298.50746  8.955224
## Bran Flakes                           313.43284  7.462687
## Cap'n'Crunch                          293.33333 16.000000
## Cheerios                              232.00000  0.800000
## Cinnamon Toast Crunch                 280.00000 12.000000
## Clusters                              280.00000 14.000000
## Cocoa Puffs                           180.00000 13.000000
## Corn Chex                             280.00000  3.000000
## Corn Flakes                           290.00000  2.000000
## Corn Pops                              90.00000 12.000000
## Count Chocula                         180.00000 13.000000
## Cracklin' Oat Bran                    280.00000 14.000000
## Crispix                               220.00000  3.000000
## Crispy Wheat & Raisins                186.66667 13.333333
## Double Chex                           253.33333  6.666667
## Froot Loops                           125.00000 13.000000
## Frosted Flakes                        266.66667 14.666667
## Frosted Mini-Wheats                     0.00000  8.750000
## Fruit & Fibre: Dates Walnuts and Oats 238.80597 14.925373
## Fruitful Bran                         358.20896 17.910448
## Fruity Pebbles                        180.00000 16.000000
## Golden Crisp                           51.13636 17.045455
## Golden Grahams                        373.33333 12.000000
## Grape Nuts Flakes                     159.09091  5.681818
## Grape-Nuts                            680.00000 12.000000
## Great Grains Pecan                    227.27273 12.121212
## Honey Graham Ohs                      220.00000 11.000000
## Honey Nut Cheerios                    333.33333 13.333333
## Honey-comb                            135.33835  8.270677
## Just Right Fruit & Nut                226.66667 12.000000
## Kix                                   173.33333  2.000000
## Life                                  223.88060  8.955224
## Lucky Charms                          180.00000 12.000000
## Mueslix Crispy Blend                  223.88060 19.402985
## Multi-Grain Cheerios                  220.00000  6.000000
## Nut&Honey Crunch                      283.58209 13.432836
## Nutri-Grain Almond-Raisin             328.35821 10.447761
## Oatmeal Raisin Crisp                  340.00000 20.000000
## Post Nat. Raisin Bran                 298.50746 20.895522
## Product 19                            320.00000  3.000000
## Puffed Rice                             0.00000  0.000000
## Quaker Oat Squares                    270.00000 12.000000
## Raisin Bran                           280.00000 16.000000
## Raisin Nut Bran                       280.00000 16.000000
## Raisin Squares                          0.00000 12.000000
## Rice Chex                             212.38938  1.769912
## Rice Krispies                         290.00000  3.000000
## Shredded Wheat 'n'Bran                  0.00000  0.000000
## Shredded Wheat spoon size               0.00000  0.000000
## Smacks                                 93.33333 20.000000
## Special K                             230.00000  3.000000
## Total Corn Flakes                     200.00000  3.000000
## Total Raisin Bran                     190.00000 14.000000
## Total Whole Grain                     200.00000  3.000000
## Triples                               333.33333  4.000000
## Trix                                  140.00000 12.000000
## Wheat Chex                            343.28358  4.477612
## Wheaties                              200.00000  3.000000
## Wheaties Honey Gold                   266.66667 10.666667
str(UScereal6)
## 'data.frame':    65 obs. of  2 variables:
##  $ sodium: num  394 788 280 240 125 ...
##  $ sugars: num  18.2 15.2 0 13.3 14 ...
ggplot(UScereal6, aes(x = sodium, y = sugars)) +
  geom_point(color = "#69b3a2", size = 3) +
  labs(title = "Relacion entre Sodio y Azucares en Cereales",
       x = "Sodio (mg)",
       y = "Azucares (g)") +
  theme_minimal()

ggplot(UScereal6, aes(x = sodium)) +
  geom_histogram(binwidth = 50, fill = "#69b3a2", color = "black") +
  labs(title = "Distribucion de Sodio en Cereales",
       x = "Sodio (mg)",
       y = "Frecuencia") +
  theme_minimal()

ggplot(UScereal6, aes(x = sugars)) +
  geom_histogram(binwidth = 2, fill = "#1f78b4", color = "black") +
  labs(title = "Distribucion de Azucares en Cereales",
       x = "Azucares (g)",
       y = "Frecuencia") +
  theme_minimal()

Mammals

mammals <- data.frame(mammals)
head(mammals)
##                    body brain
## Arctic fox        3.385  44.5
## Owl monkey        0.480  15.5
## Mountain beaver   1.350   8.1
## Cow             465.000 423.0
## Grey wolf        36.330 119.5
## Goat             27.660 115.0
# Gráfico de dispersión para visualizar la relación entre peso corporal y peso cerebral
ggplot(mammals, aes(x = body, y = brain)) +
  geom_point(color = "#69b3a2", size = 3) +
  labs(title = "Relacion entre Peso Corporal y Peso Cerebral en Mamiferos",
       x = "Peso Corporal (kg)",
       y = "Peso Cerebral (g)") +
  theme_minimal()

correlation <- cor(mammals$body, mammals$brain)
print(paste("Correlación lineal: ", correlation))
## [1] "Correlación lineal:  0.934163842323355"
# Transformación logarítmica de los datos
mammals$log_body <- log(mammals$body)
mammals$log_brain <- log(mammals$brain)

# Gráfico de dispersión con datos transformados
ggplot(mammals, aes(x = log_body, y = log_brain)) +
  geom_point(color = "#1f78b4", size = 3) +
  labs(title = "Relacion Logaritmica entre Peso Corporal y Peso Cerebral",
       x = "Log(Peso Corporal) (kg)",
       y = "Log(Peso Cerebral) (g)") +
  theme_minimal()

log_correlation <- cor(mammals$log_body, mammals$log_brain)
print(paste("Correlacion lineal despues de la transformacion logaritmica: ", log_correlation))
## [1] "Correlacion lineal despues de la transformacion logaritmica:  0.95957475837098"

Anorexia

anorexia <- data.frame(anorexia)
head(anorexia)
##   Treat Prewt Postwt
## 1  Cont  80.7   80.2
## 2  Cont  89.4   80.1
## 3  Cont  91.8   86.4
## 4  Cont  74.0   86.3
## 5  Cont  78.1   76.1
## 6  Cont  88.3   78.1
str(anorexia)
## 'data.frame':    72 obs. of  3 variables:
##  $ Treat : Factor w/ 3 levels "CBT","Cont","FT": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Prewt : num  80.7 89.4 91.8 74 78.1 88.3 87.3 75.1 80.6 78.4 ...
##  $ Postwt: num  80.2 80.1 86.4 86.3 76.1 78.1 75.1 86.7 73.5 84.6 ...
# Calcular el cambio de peso
anorexia$WeightChange <- anorexia$Postwt - anorexia$Prewt

# Crear un boxplot del cambio de peso por tratamiento
ggplot(anorexia, aes(x = Treat, y = WeightChange, fill = Treat)) +
  geom_boxplot() +
  labs(title = "Cambio de Peso por Tratamiento en Pacientes Anorexia",
       x = "Tratamiento",
       y = "Cambio de Peso (Postwt - Prewt)") +
  theme_minimal()

# Crear una columna indicando si hubo ganancia o pérdida de peso
anorexia$WeightStatus <- ifelse(anorexia$WeightChange > 0, "Gano Peso", "Perdio Peso")

# Crear un gráfico de barras
ggplot(anorexia, aes(x = Treat, fill = WeightStatus)) +
  geom_bar(position = "dodge") +
  labs(title = "Numero de Pacientes que Ganaron o Perdieron Peso por Tratamiento",
       x = "Tratamiento",
       y = "Numero de Pacientes") +
  scale_fill_manual(values = c("Gano Peso" = "#69b3a2", "Perdio Peso" = "#ff6961")) +
  theme_minimal()