##install.packages("MASS")
#install.packages("UsingR")
#install.packages("examplePackage")
#install.packages("ggplot2")
#install.packages("plotly")
library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(datasets)
library(ggplot2)
library(plotly)
##
## Adjuntando el paquete: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(MASS)
##
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
library(UsingR)
## Cargando paquete requerido: HistData
## Cargando paquete requerido: Hmisc
##
## Adjuntando el paquete: 'Hmisc'
## The following object is masked from 'package:plotly':
##
## subplot
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
datos <- data.frame(brillo = brightness)
#A
ggplot(datos, aes(x = brillo)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "lightblue", color = "black") +
geom_density(alpha = 0.2, fill = "red") +
labs(title = "Histograma y Curva de Densidad del Brillo de Estrellas",
x = "Brillo",
y = "Densidad") +
theme_minimal()
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ā¹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#B
boxplot(brightness,
main = "Boxplot del Brillo de Estrellas",
ylab = "Brillo",
col = "lightblue")

stats <- boxplot.stats(brightness)$out
stats_sorted <- sort(stats)
print(stats_sorted)
## [1] 2.07 2.28 3.88 4.37 4.55 4.61 4.78 4.89 4.99 5.01 5.04 5.13
## [13] 5.24 5.29 5.41 5.42 5.53 5.54 5.55 11.28 11.55 11.55 11.55 11.63
## [25] 11.65 11.67 11.71 11.73 11.79 11.80 11.99 12.04 12.04 12.14 12.17 12.19
## [37] 12.31 12.43
## el segundo mejor outlier es 2.28
##C
limiteInf <- 5.6
limiteSup <- 11
brightness_without <- brightness[brightness >= limiteInf & brightness <= limiteSup]
length(brightness_without)
## [1] 918
## D
# la moda es 8,55, tiene una distribubion en campana con un solo pico y de igual forma
# tiene varios valores atipicos que pueden observarse en bocplot
UScereal <- data.frame(UScereal)
head(UScereal)
## mfr calories protein fat sodium fibre
## 100% Bran N 212.1212 12.121212 3.030303 393.9394 30.303030
## All-Bran K 212.1212 12.121212 3.030303 787.8788 27.272727
## All-Bran with Extra Fiber K 100.0000 8.000000 0.000000 280.0000 28.000000
## Apple Cinnamon Cheerios G 146.6667 2.666667 2.666667 240.0000 2.000000
## Apple Jacks K 110.0000 2.000000 0.000000 125.0000 1.000000
## Basic 4 G 173.3333 4.000000 2.666667 280.0000 2.666667
## carbo sugars shelf potassium vitamins
## 100% Bran 15.15152 18.18182 3 848.48485 enriched
## All-Bran 21.21212 15.15151 3 969.69697 enriched
## All-Bran with Extra Fiber 16.00000 0.00000 3 660.00000 enriched
## Apple Cinnamon Cheerios 14.00000 13.33333 1 93.33333 enriched
## Apple Jacks 11.00000 14.00000 2 30.00000 enriched
## Basic 4 24.00000 10.66667 3 133.33333 enriched
##A
##manufacturer & shelf.
UScerealMS <- dplyr::select(UScereal, mfr, shelf)
UScerealMS$shelf <- factor(UScerealMS$shelf)
dato_shelf <- levels(UScerealMS$shelf)
dato_shelf
## [1] "1" "2" "3"
ggplot(UScerealMS, aes(x = mfr, fill = factor(shelf))) +
geom_bar(position = "dodge") + # "dodge" para barras agrupadas
geom_text(stat = "count", aes(label = ..count..), position = position_dodge(width = 0.9), vjust = -0.5) +
labs(title = "Fabricantes por Estantes",
x = "Fabricante",
y = "Cantidad",
fill = "Estantes") +
theme_minimal()

##fat & vitamins.
UScerealFV <- dplyr::select(UScereal, fat, vitamins)
UScerealFV
## fat vitamins
## 100% Bran 3.0303030 enriched
## All-Bran 3.0303030 enriched
## All-Bran with Extra Fiber 0.0000000 enriched
## Apple Cinnamon Cheerios 2.6666667 enriched
## Apple Jacks 0.0000000 enriched
## Basic 4 2.6666667 enriched
## Bran Chex 1.4925373 enriched
## Bran Flakes 0.0000000 enriched
## Cap'n'Crunch 2.6666667 enriched
## Cheerios 1.6000000 enriched
## Cinnamon Toast Crunch 4.0000000 enriched
## Clusters 4.0000000 enriched
## Cocoa Puffs 1.0000000 enriched
## Corn Chex 0.0000000 enriched
## Corn Flakes 0.0000000 enriched
## Corn Pops 0.0000000 enriched
## Count Chocula 1.0000000 enriched
## Cracklin' Oat Bran 6.0000000 enriched
## Crispix 0.0000000 enriched
## Crispy Wheat & Raisins 1.3333333 enriched
## Double Chex 0.0000000 enriched
## Froot Loops 1.0000000 enriched
## Frosted Flakes 0.0000000 enriched
## Frosted Mini-Wheats 0.0000000 enriched
## Fruit & Fibre: Dates Walnuts and Oats 2.9850746 enriched
## Fruitful Bran 0.0000000 enriched
## Fruity Pebbles 1.3333333 enriched
## Golden Crisp 0.0000000 enriched
## Golden Grahams 1.3333333 enriched
## Grape Nuts Flakes 1.1363636 enriched
## Grape-Nuts 0.0000000 enriched
## Great Grains Pecan 9.0909091 enriched
## Honey Graham Ohs 2.0000000 enriched
## Honey Nut Cheerios 1.3333333 enriched
## Honey-comb 0.0000000 enriched
## Just Right Fruit & Nut 1.3333333 100%
## Kix 0.6666667 enriched
## Life 2.9850746 enriched
## Lucky Charms 1.0000000 enriched
## Mueslix Crispy Blend 2.9850746 enriched
## Multi-Grain Cheerios 1.0000000 enriched
## Nut&Honey Crunch 1.4925373 enriched
## Nutri-Grain Almond-Raisin 2.9850746 enriched
## Oatmeal Raisin Crisp 4.0000000 enriched
## Post Nat. Raisin Bran 1.4925373 enriched
## Product 19 0.0000000 100%
## Puffed Rice 0.0000000 none
## Quaker Oat Squares 2.0000000 enriched
## Raisin Bran 1.3333333 enriched
## Raisin Nut Bran 4.0000000 enriched
## Raisin Squares 0.0000000 enriched
## Rice Chex 0.0000000 enriched
## Rice Krispies 0.0000000 enriched
## Shredded Wheat 'n'Bran 0.0000000 none
## Shredded Wheat spoon size 0.0000000 none
## Smacks 1.3333333 enriched
## Special K 0.0000000 enriched
## Total Corn Flakes 1.0000000 100%
## Total Raisin Bran 1.0000000 100%
## Total Whole Grain 1.0000000 100%
## Triples 1.3333333 enriched
## Trix 1.0000000 enriched
## Wheat Chex 1.4925373 enriched
## Wheaties 1.0000000 enriched
## Wheaties Honey Gold 1.3333333 enriched
ggplot(UScerealFV, aes(x = vitamins, y = fat, fill = vitamins)) +
stat_summary(fun = "mean", geom = "bar") +
labs(title = "Media de Grasa por Vitaminas",
x = "Nivel de Vitaminas",
y = "Media de Grasa (g)") +
theme_minimal() +
theme(legend.position = "none")

ggplot(UScerealFV, aes(x = fat, fill = vitamins)) +
geom_histogram(binwidth = 1, color = "black", alpha = 0.7) +
facet_wrap(~ vitamins, scales = "free") +
labs(title = "Histograma de Grasa por Vitaminas",
x = "Grasa (g)",
y = "Frecuencia") +
theme_minimal()

##fat & shelf.
UScerealFS <- dplyr::select(UScereal, fat, shelf)
UScerealFS
## fat shelf
## 100% Bran 3.0303030 3
## All-Bran 3.0303030 3
## All-Bran with Extra Fiber 0.0000000 3
## Apple Cinnamon Cheerios 2.6666667 1
## Apple Jacks 0.0000000 2
## Basic 4 2.6666667 3
## Bran Chex 1.4925373 1
## Bran Flakes 0.0000000 3
## Cap'n'Crunch 2.6666667 2
## Cheerios 1.6000000 1
## Cinnamon Toast Crunch 4.0000000 2
## Clusters 4.0000000 3
## Cocoa Puffs 1.0000000 2
## Corn Chex 0.0000000 1
## Corn Flakes 0.0000000 1
## Corn Pops 0.0000000 2
## Count Chocula 1.0000000 2
## Cracklin' Oat Bran 6.0000000 3
## Crispix 0.0000000 3
## Crispy Wheat & Raisins 1.3333333 3
## Double Chex 0.0000000 3
## Froot Loops 1.0000000 2
## Frosted Flakes 0.0000000 1
## Frosted Mini-Wheats 0.0000000 2
## Fruit & Fibre: Dates Walnuts and Oats 2.9850746 3
## Fruitful Bran 0.0000000 3
## Fruity Pebbles 1.3333333 2
## Golden Crisp 0.0000000 1
## Golden Grahams 1.3333333 2
## Grape Nuts Flakes 1.1363636 3
## Grape-Nuts 0.0000000 3
## Great Grains Pecan 9.0909091 3
## Honey Graham Ohs 2.0000000 2
## Honey Nut Cheerios 1.3333333 1
## Honey-comb 0.0000000 1
## Just Right Fruit & Nut 1.3333333 3
## Kix 0.6666667 2
## Life 2.9850746 2
## Lucky Charms 1.0000000 2
## Mueslix Crispy Blend 2.9850746 3
## Multi-Grain Cheerios 1.0000000 1
## Nut&Honey Crunch 1.4925373 2
## Nutri-Grain Almond-Raisin 2.9850746 3
## Oatmeal Raisin Crisp 4.0000000 3
## Post Nat. Raisin Bran 1.4925373 3
## Product 19 0.0000000 3
## Puffed Rice 0.0000000 3
## Quaker Oat Squares 2.0000000 3
## Raisin Bran 1.3333333 2
## Raisin Nut Bran 4.0000000 3
## Raisin Squares 0.0000000 3
## Rice Chex 0.0000000 1
## Rice Krispies 0.0000000 1
## Shredded Wheat 'n'Bran 0.0000000 1
## Shredded Wheat spoon size 0.0000000 1
## Smacks 1.3333333 2
## Special K 0.0000000 1
## Total Corn Flakes 1.0000000 3
## Total Raisin Bran 1.0000000 3
## Total Whole Grain 1.0000000 3
## Triples 1.3333333 3
## Trix 1.0000000 2
## Wheat Chex 1.4925373 1
## Wheaties 1.0000000 1
## Wheaties Honey Gold 1.3333333 1
str(UScerealFS)
## 'data.frame': 65 obs. of 2 variables:
## $ fat : num 3.03 3.03 0 2.67 0 ...
## $ shelf: int 3 3 3 1 2 3 1 3 2 1 ...
ggplot(UScerealFS, aes(x = factor(shelf), y = fat, fill = factor(shelf))) +
stat_summary(fun = "mean", geom = "bar") +
labs(title = "Media de Grasa por shelf",
x = "shelf",
y = "Media de Grasa (g)") +
theme_minimal() +
theme(legend.position = "none")

ggplot(UScerealFS, aes(x = fat, fill = factor(shelf))) +
geom_histogram(binwidth = 1, color = "black", alpha = 0.7) +
facet_wrap(~ shelf, scales = "free") +
labs(title = "Histograma de Grasa por shelf",
x = "Grasa (g)",
y = "Frecuencia") +
theme_minimal()

##carbohydrates & sugars
UScerealCS <- dplyr::select(UScereal, carbo, sugars)
UScerealCS
## carbo sugars
## 100% Bran 15.15152 18.181818
## All-Bran 21.21212 15.151515
## All-Bran with Extra Fiber 16.00000 0.000000
## Apple Cinnamon Cheerios 14.00000 13.333333
## Apple Jacks 11.00000 14.000000
## Basic 4 24.00000 10.666667
## Bran Chex 22.38806 8.955224
## Bran Flakes 19.40299 7.462687
## Cap'n'Crunch 16.00000 16.000000
## Cheerios 13.60000 0.800000
## Cinnamon Toast Crunch 17.33333 12.000000
## Clusters 26.00000 14.000000
## Cocoa Puffs 12.00000 13.000000
## Corn Chex 22.00000 3.000000
## Corn Flakes 21.00000 2.000000
## Corn Pops 13.00000 12.000000
## Count Chocula 12.00000 13.000000
## Cracklin' Oat Bran 20.00000 14.000000
## Crispix 21.00000 3.000000
## Crispy Wheat & Raisins 14.66667 13.333333
## Double Chex 24.00000 6.666667
## Froot Loops 11.00000 13.000000
## Frosted Flakes 18.66667 14.666667
## Frosted Mini-Wheats 17.50000 8.750000
## Fruit & Fibre: Dates Walnuts and Oats 17.91045 14.925373
## Fruitful Bran 20.89552 17.910448
## Fruity Pebbles 17.33333 16.000000
## Golden Crisp 12.50000 17.045455
## Golden Grahams 20.00000 12.000000
## Grape Nuts Flakes 17.04545 5.681818
## Grape-Nuts 68.00000 12.000000
## Great Grains Pecan 39.39394 12.121212
## Honey Graham Ohs 12.00000 11.000000
## Honey Nut Cheerios 15.33333 13.333333
## Honey-comb 10.52632 8.270677
## Just Right Fruit & Nut 26.66667 12.000000
## Kix 14.00000 2.000000
## Life 17.91045 8.955224
## Lucky Charms 12.00000 12.000000
## Mueslix Crispy Blend 25.37313 19.402985
## Multi-Grain Cheerios 15.00000 6.000000
## Nut&Honey Crunch 22.38806 13.432836
## Nutri-Grain Almond-Raisin 31.34328 10.447761
## Oatmeal Raisin Crisp 27.00000 20.000000
## Post Nat. Raisin Bran 16.41791 20.895522
## Product 19 20.00000 3.000000
## Puffed Rice 13.00000 0.000000
## Quaker Oat Squares 28.00000 12.000000
## Raisin Bran 18.66667 16.000000
## Raisin Nut Bran 21.00000 16.000000
## Raisin Squares 30.00000 12.000000
## Rice Chex 20.35398 1.769912
## Rice Krispies 22.00000 3.000000
## Shredded Wheat 'n'Bran 28.35821 0.000000
## Shredded Wheat spoon size 29.85075 0.000000
## Smacks 12.00000 20.000000
## Special K 16.00000 3.000000
## Total Corn Flakes 21.00000 3.000000
## Total Raisin Bran 15.00000 14.000000
## Total Whole Grain 16.00000 3.000000
## Triples 28.00000 4.000000
## Trix 13.00000 12.000000
## Wheat Chex 25.37313 4.477612
## Wheaties 17.00000 3.000000
## Wheaties Honey Gold 21.33333 10.666667
str(UScerealCS)
## 'data.frame': 65 obs. of 2 variables:
## $ carbo : num 15.2 21.2 16 14 11 ...
## $ sugars: num 18.2 15.2 0 13.3 14 ...
ggplot(UScerealCS, aes(x = carbo, y = sugars)) +
geom_point(color = "#69b3a2") +
facet_wrap(~ cut(carbo, breaks = 4)) +
labs(title = "Relacion entre Carbohidratos y Azucares por Rangos de Carbohidratos",
x = "Carbohidratos (g)",
y = "Azucares (g)") +
theme_minimal()

##fibre & manufacturer
UScerealFM <- dplyr::select(UScereal, fibre, mfr)
UScerealFM
## fibre mfr
## 100% Bran 30.303030 N
## All-Bran 27.272727 K
## All-Bran with Extra Fiber 28.000000 K
## Apple Cinnamon Cheerios 2.000000 G
## Apple Jacks 1.000000 K
## Basic 4 2.666667 G
## Bran Chex 5.970149 R
## Bran Flakes 7.462687 P
## Cap'n'Crunch 0.000000 Q
## Cheerios 1.600000 G
## Cinnamon Toast Crunch 0.000000 G
## Clusters 4.000000 G
## Cocoa Puffs 0.000000 G
## Corn Chex 0.000000 R
## Corn Flakes 1.000000 K
## Corn Pops 1.000000 K
## Count Chocula 0.000000 G
## Cracklin' Oat Bran 8.000000 K
## Crispix 1.000000 K
## Crispy Wheat & Raisins 2.666667 G
## Double Chex 1.333333 R
## Froot Loops 1.000000 K
## Frosted Flakes 1.333333 K
## Frosted Mini-Wheats 3.750000 K
## Fruit & Fibre: Dates Walnuts and Oats 7.462687 P
## Fruitful Bran 7.462687 K
## Fruity Pebbles 0.000000 P
## Golden Crisp 0.000000 P
## Golden Grahams 0.000000 G
## Grape Nuts Flakes 3.409091 P
## Grape-Nuts 12.000000 P
## Great Grains Pecan 9.090909 P
## Honey Graham Ohs 1.000000 Q
## Honey Nut Cheerios 2.000000 G
## Honey-comb 0.000000 P
## Just Right Fruit & Nut 2.666667 K
## Kix 0.000000 G
## Life 2.985075 Q
## Lucky Charms 0.000000 G
## Mueslix Crispy Blend 4.477612 K
## Multi-Grain Cheerios 2.000000 G
## Nut&Honey Crunch 0.000000 K
## Nutri-Grain Almond-Raisin 4.477612 K
## Oatmeal Raisin Crisp 3.000000 G
## Post Nat. Raisin Bran 8.955224 P
## Product 19 1.000000 K
## Puffed Rice 0.000000 Q
## Quaker Oat Squares 4.000000 Q
## Raisin Bran 6.666667 K
## Raisin Nut Bran 5.000000 G
## Raisin Squares 4.000000 K
## Rice Chex 0.000000 R
## Rice Krispies 0.000000 K
## Shredded Wheat 'n'Bran 5.970149 N
## Shredded Wheat spoon size 4.477612 N
## Smacks 1.333333 K
## Special K 1.000000 K
## Total Corn Flakes 0.000000 G
## Total Raisin Bran 4.000000 G
## Total Whole Grain 3.000000 G
## Triples 0.000000 G
## Trix 0.000000 G
## Wheat Chex 4.477612 R
## Wheaties 3.000000 G
## Wheaties Honey Gold 1.333333 G
str(UScerealFM)
## 'data.frame': 65 obs. of 2 variables:
## $ fibre: num 30.3 27.3 28 2 1 ...
## $ mfr : Factor w/ 6 levels "G","K","N","P",..: 3 2 2 1 2 1 6 4 5 1 ...
#ggplot(UScerealFM, aes(x = sodium)) +
# geom_histogram(binwidth = 50, fill = "#69b3a2", color = "black") +
#labs(title = "Distribucion de Sodio en Cereales",
# x = "Sodio (mg)",
# y = "Frecuencia") +
# theme_minimal()
##sodium & sugars
#UScerealSS <- dplyr::select(UScereal, sodium, sugars)
#UScerealSS
#ggplot(UScerealSS, aes(x = sodium)) +
# geom_histogram(binwidth = 50, fill = "#69b3a2", color = "black") +
#labs(title = "Distribucion de Sodio en Cereales",
# x = "Sodio (mg)",
# y = "Frecuencia") +
# theme_minimal()
##4
# A
mammals <- data.frame(mammals)
head(mammals)
## body brain
## Arctic fox 3.385 44.5
## Owl monkey 0.480 15.5
## Mountain beaver 1.350 8.1
## Cow 465.000 423.0
## Grey wolf 36.330 119.5
## Goat 27.660 115.0
ggplot(mammals, aes(x = body , y = brain)) +
geom_point(color = "#69b3a2", size = 3) +
labs(title = "Relacion entre cuerpo y cerecro",
x = "peso cuerpo (kg)",
y = "peso cerebro (g)") +
theme_minimal()

#B
correlacion <- cor(mammals$body, mammals$brain)
print(paste("Correlación lineal: ", correlacion))
## [1] "Correlación lineal: 0.934163842323355"
#C
mammals$log_body <- log(mammals$body)
mammals$log_brain <- log(mammals$brain)
ggplot(mammals, aes(x = log_body, y = log_brain)) +
geom_point(color = "#1f78b4", size = 3) +
labs(title = "Relacion Logaritmica entre Peso Corporal y Peso Cerebral",
x = "Log(Peso Corporal) (kg)",
y = "Log(Peso Cerebral) (g)") +
theme_minimal()

correlacionLog <- cor(mammals$log_body, mammals$log_brain)
print(paste( correlacionLog))
## [1] "0.95957475837098"
## en su forma logarirmica es mas apreciable la relacion entre las dos variables a simple vista
##5
anorexia <- data.frame(anorexia)
head(anorexia)
## Treat Prewt Postwt
## 1 Cont 80.7 80.2
## 2 Cont 89.4 80.1
## 3 Cont 91.8 86.4
## 4 Cont 74.0 86.3
## 5 Cont 78.1 76.1
## 6 Cont 88.3 78.1
##A
anorexia$WeightChange <- anorexia$Postwt - anorexia$Prewt
anorexia$WeightStatus <- ifelse(anorexia$WeightChange > 0, "Gano Peso", "Perdio Peso")
ggplot(anorexia, aes(x = Treat, fill = WeightStatus)) +
geom_bar(position = "dodge") +
labs(title = "Numero de Pacientes que Ganaron o Perdieron Peso por Tratamiento",
x = "Tratamiento",
y = "Numero de Pacientes") +
scale_fill_manual(values = c("Gano Peso" = "#69b3a2", "Perdio Peso" = "#ff6961")) +
theme_minimal()
