Taller Clase 08

#Instalar los paquetes

#install.packages("MASS")
#install.packages("UsingR")

#Cargar los paquetes

library("MASS")
library("UsingR")

## Cargando paquete requerido: HistData

## Cargando paquete requerido: Hmisc

## 
## Adjuntando el paquete: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library("dplyr")

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library("ggplot2")
library("plotly")

## 
## Adjuntando el paquete: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:Hmisc':
## 
##     subplot

## The following object is masked from 'package:MASS':
## 
##     select

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

#Llamar el data

data("brightness")

Total<-c(sum(brightness))
Stars<-length(brightness)

Histogram

h<-ggplot(data = data.frame(x = brightness), aes(x = x)) +
  geom_histogram(aes(y = ..density..), binwidth = 1, color = "black", fill = "lightblue") +
  geom_density(color = "purple", size = 1) +
  labs(title = "Histograma del Brillo de 966 Estrellas",
       x = "Brillo de las Estrellas",
       y = "Densidad") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

interactive_plot <- ggplotly(h)

## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## ℹ The deprecated feature was likely used in the ggplot2 package.
##   Please report the issue at <https://github.com/tidyverse/ggplot2/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

interactive_plot

Esta base de datos tiene una distribución normal, lo que quiere decir que la moda, media y mediana tienden a ser el mismo número. Se puede inferir que la mayoría de estrellas tienen un brillo de 7.5 y 9.5

boxplot_plot<-ggplot(data = data.frame(x = brightness), aes(y = brightness),outliers=TRUE) +
  geom_boxplot(fill = "lightblue", color = "black") +
  labs(title= "Boxplot Interactivo", y = "Brillo") +
  theme_minimal()

#Colocar el promedio
mean_brightness <- mean(brightness)
boxplot_plot <- boxplot_plot +
  geom_point(aes(x = 1, y = mean_brightness), color = "pink", shape = 5, size = 3) +
  annotate("text", x = 1.1, y = mean_brightness + 2, label = paste("Mean ", round(mean_brightness, 2)), color = "pink")


# Convertir el boxplot a un gráfico interactivo
interactive_boxplot <- ggplotly(boxplot_plot)

## Warning in geom_point(aes(x = 1, y = mean_brightness), color = "pink", shape = 5, : All aesthetics have length 1, but the data has 966 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

# Mostrar el gráfico interactivo
interactive_boxplot

Second smallest outliner is 2.28

-Calcular el diagrama si outliers

# Calcular los cuartiles y el rango intercuartil
Q1 <- quantile(brightness, 0.25)
Q3 <- quantile(brightness, 0.75)
IQR <- Q3 - Q1

# Definir los límites superior e inferior para los datos no outliers
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR

# Filtrar los datos para eliminar los outliers
brightness.without <- brightness[brightness >= lower_bound & brightness <= upper_bound]

# Crear el boxplot sin outliers
boxplot_plot <- ggplot(data = data.frame(brightness.without), aes(y = brightness.without)) +
  geom_boxplot(fill = "lightblue", color = "black") +
  labs(title = "Boxplot sin Outliers", y = "Brillo") +
  theme_minimal()

# Calcular y colocar el promedio
mean_brightness <- mean(brightness.without)
boxplot_plot <- boxplot_plot +
  geom_point(aes(x = 1, y = mean_brightness), color = "pink", shape = 5, size = 3) +
  annotate("text", x = 1.1, y = mean_brightness + 2, label = paste("Mean", round(mean_brightness, 2)), color = "pink")

# Convertir el boxplot a un gráfico interactivo
interactive_boxplot_1 <- ggplotly(boxplot_plot)

## Warning in geom_point(aes(x = 1, y = mean_brightness), color = "pink", shape = 5, : All aesthetics have length 1, but the data has 928 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

# Mostrar el gráfico interactivo
interactive_boxplot_1

UScereal contain information on the breakfast with cereals.

Determine and interpret the relationships between the following pairs of variables using scatter plots, boxplots, or bar charts as appropriate.

manufacturer & shelf.

data("UScereal")

table(UScereal$carbo)

## 
## 10.52632       11       12     12.5       13     13.6       14 14.66667 
##        1        2        5        1        3        1        2        1 
##       15 15.15152 15.33333       16 16.41791       17 17.04545 17.33333 
##        2        1        1        4        1        1        1        2 
##     17.5 17.91045 18.66667 19.40299       20 20.35398 20.89552       21 
##        1        2        2        1        3        1        1        4 
## 21.21212 21.33333       22 22.38806       24 25.37313       26 26.66667 
##        1        1        2        2        2        2        1        1 
##       27       28 28.35821 29.85075       30 31.34328 39.39394       68 
##        1        2        1        1        1        1        1        1

Bar_shelfmfr <- ggplot(UScereal, aes(x = shelf, fill = mfr)) +
  geom_bar(position = "dodge") +
  labs(title = "Número de cereales por fabricante y estantería", x = "Estante", y = "Número de cereales") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Mostrar el gráfico
Bar_shelfmfr

barras_interactivas <-ggplotly(Bar_shelfmfr)
barras_interactivas

En este gráfico podemos observar que, en el estante 3 hay más cereales, y en este estante, 10 cereales son de el mfr K.

boxplot_vf<-ggplot(UScereal, aes(x = vitamins, y = fat)) +
  geom_boxplot() +
  labs(title = "Distribución de grasa por nivel de vitaminas", x = "Nivel de Vitaminas", y = "Grasa") +
  theme_minimal()

interactive_boxplot2 <- ggplotly(boxplot_vf)
interactive_boxplot2

Los cereales con nivel de vitaminas enriched, tiene mayor variabilidad de grasa en comparación a los otros niveles de vitaminas, donde la mediana esta en 1.33 y hay un cereal con altos niveles de grasa, como muestra el valor átipico. Para los que no tienen nada de vitaminas, el nivel de grasa nisiquiera existe. Y los que tienen vitaminas al 100% tienen un nivel bajo de grasa, con poca variabilidad.

boxplot_fs<-ggplot(UScereal, aes(x = factor(shelf), y = fat)) +
  geom_boxplot() +
  labs(title = "Fat by Shelf", x = "Estante", y = "Grasa") +
  theme_minimal()

interactive_boxplot3 <- ggplotly(boxplot_fs)
interactive_boxplot3

El estante 2, tiene valores átipicos mientras que la grasa en los cereales del estante 3, es más variable que en el resto.

dispersion_1<-ggplot(UScereal, aes(x= carbo,y= sugars)) +
  geom_point(size = 3) +
   geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Carbo and Sugars",x="carbo",y="azucares") +
  theme_minimal()
  theme(axis.text.y = element_text(angle = 45, hjust = 1))

## List of 1
##  $ axis.text.y:List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 1
##   ..$ vjust        : NULL
##   ..$ angle        : num 45
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

dispersion_interactive <- ggplotly(dispersion_1)

## `geom_smooth()` using formula = 'y ~ x'

dispersion_interactive

Esta gráfica podría indicar que los azúcares no dependen tanto de la cantidad de carbohidratos presentes, al menos en la muestra de datos utilizada, ya que aunque es una relación negativa, es muy pequeña la pendiente.

dispersion_4 <- ggplot(UScereal, aes(x = mfr, y = fibre)) +
  geom_bar(stat = "identity", fill = "skyblue") + 
  labs(title = "Total Fibre by Manufacturer", x = "Manufacturer (mfr)", y = "Total Fibre") +
  theme_minimal() +
  theme(axis.text.y = element_text(angle = 45, hjust = 1))

dispersion_4

barras_interactivas1 <-ggplotly(Bar_shelfmfr)
barras_interactivas1

Con la barra anterior podemos establecer que la mayor cantidad de fibra fue encontrada en los productos del estante K.

dispersion_5<-ggplot(UScereal, aes(x= sodium,y= sugars)) +
  geom_point(size = 3) +
   geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Sodium and Sugars",x="sodium",y="sugars") +
  theme_minimal()
  theme(axis.text.y = element_text(angle = 45, hjust = 1))

## List of 1
##  $ axis.text.y:List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 1
##   ..$ vjust        : NULL
##   ..$ angle        : num 45
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

dispersion_5

## `geom_smooth()` using formula = 'y ~ x'

interactive_diagram <- ggplotly(dispersion_5)

## `geom_smooth()` using formula = 'y ~ x'

interactive_diagram

Aunque la línea de regresión líneal muestre una relación positiva entre variables, en esta muestra, la mayoría de cerales tiene bajo sodio y su azúcar es muy variado.

data("mammals")

dispersion_7<-ggplot(mammals, aes(x= body,y= brain)) +
  geom_point(size = 3) +
   geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Relación peso corporal y tamaño del cerebro",x="body",y="brain") +
  theme_minimal()
  theme(axis.text.y = element_text(angle = 45, hjust = 1))

## List of 1
##  $ axis.text.y:List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 1
##   ..$ vjust        : NULL
##   ..$ angle        : num 45
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

dispersion_7

## `geom_smooth()` using formula = 'y ~ x'

interactive_diagram <- ggplotly(dispersion_7)

## `geom_smooth()` using formula = 'y ~ x'

interactive_diagram


: La mayoría de los datos parecen estar concentrados cerca del origen (cerca de los valores bajos de peso corporal y peso cerebral), lo que podría estar indicando una distribución sesgada.



``` r
dispersion_8<- ggplot(mammals, aes(x = log(body), y = log(brain))) +
  geom_point(size = 3) +
   geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Relación Logaritmo Peso Corporal y Tamaño del Cerebro", 
       x = "Log(Peso Corporal)", y = "Log(Tamaño del Cerebro)") +
  theme_minimal() +
  theme(axis.text.y = element_text(angle = 45, hjust = 1))

dispersion_8

## `geom_smooth()` using formula = 'y ~ x'

interactive_diagram <- ggplotly(dispersion_8)

## `geom_smooth()` using formula = 'y ~ x'

interactive_diagram

Según el diagrama, a medida que aumenta el peso corporal, aumenta el tamaño del cerebro, es decir, son variables directamente proporcionales.

Taller Clase 08

Natalia Carvajal

2024-08-08