Datos individuales para la asignaciĂ³n; no usar estos
library(readxl)
datafood <- read_excel("datafood.xlsx", sheet = "fooddata")
head(datafood)
## # A tibble: 6 x 9
## name type cal serving fat chol sodium carbs protein
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 pollo can 45 56 1 35 260 0 9
## 2 cornbeef can 180 84 11 65 800 1 20
## 3 maiz can 80 125 1 0 310 17 2
## 4 salchicha can 73 57 6 35 460 0 5
## 5 atun can 70 7 0.5 35 320 0 16
## 6 remolacha can 45 126 0 NA 240 9 1
Su respuesta.
Su respuesta.
# crear data.table
library(data.table)
datafoodDT <- data.table(datafood)
# funciĂ³n para error estĂ¡ndar
errest <- function(x) {
ee <- sd(x)/sqrt(length(x))
return(ee)
}
# tablas de estadĂsticas por variable y tipo de alimento
# calorĂas
descri.cal <- datafoodDT[ ,list(Mediana=median(cal),
MĂ¡ximo=max(cal),
MĂnimo=min(cal),
Media=mean(cal),
Desv.Est.=sd(cal),
Err.Est.=errest(cal)),
by=list(Tipo=type)
]
descri.cal
## Tipo Mediana MĂ¡ximo MĂnimo Media Desv.Est. Err.Est.
## 1: can 71.5 180 20 76.8 45.48699 14.38425
## 2: liq 75.0 130 0 66.5 43.59218 13.78506
## 3: dry 145.0 200 110 151.4 35.98827 11.38049
library(ggplot2)
hist.cal <- ggplot(datafood, aes(cal)) +
geom_histogram(aes(y=..density..), bins = 12, colour="white", fill="green") +
labs(x="CalorĂas", y = "Density") +
stat_function(fun = dnorm,
args = list(mean = mean(datafood$cal, na.rm = TRUE),
sd = sd(datafood$cal, na.rm = TRUE)),
colour = "red", size = 1)
hist.cal
shapiro.test(datafood$cal)
##
## Shapiro-Wilk normality test
##
## data: datafood$cal
## W = 0.96487, p-value = 0.4099
ggplot(datafood, aes(x=protein, y=cal)) +
geom_point(aes(color=type))
## Warning: Removed 1 rows containing missing values (geom_point).
fit1 <- lm(cal ~ protein, data=datafood, na.action=na.exclude)
summary(fit1)
##
## Call:
## lm(formula = cal ~ protein, data = datafood, na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -83.297 -34.698 -4.231 40.302 90.862
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74.792 13.039 5.736 4.25e-06 ***
## protein 4.907 2.007 2.445 0.0213 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.52 on 27 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1812, Adjusted R-squared: 0.1509
## F-statistic: 5.977 on 1 and 27 DF, p-value: 0.0213
ggplot(data=datafood, aes(x=protein, y=cal)) +
geom_point(aes(color=type), size=2) +
geom_smooth(method="lm", color="red", linetype=2) +
labs(x="ProteĂna, g", y="CalorĂas")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
plot(datafood$protein,residuals(fit1),
abline(h=0),
xlab="ProteĂna, g",
ylab="residuales")
library(ggplot2)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.1.3 ✔ purrr 0.3.3
## ✔ tidyr 1.0.0 ✔ dplyr 0.8.3
## ✔ readr 1.3.1 ✔ stringr 1.4.0
## ✔ tibble 2.1.3 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library(dplyr)
# modelo sin quitar NA
fit2 <- lm(cal ~ protein, data=datafood)
summary(fit2)
##
## Call:
## lm(formula = cal ~ protein, data = datafood)
##
## Residuals:
## Min 1Q Median 3Q Max
## -83.297 -34.698 -4.231 40.302 90.862
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74.792 13.039 5.736 4.25e-06 ***
## protein 4.907 2.007 2.445 0.0213 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.52 on 27 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1812, Adjusted R-squared: 0.1509
## F-statistic: 5.977 on 1 and 27 DF, p-value: 0.0213
# quitar NA y graficar
fit2 %>%
na.omit() %>%
ggplot(aes(.fitted, .resid))+
geom_point()+
geom_hline(yintercept=0, col="green", linetype="dashed")+
theme(axis.title=element_text(size=10,face="bold"),
axis.text=element_text(size=10, face="bold"))+
xlab("Valores estimados")+
ylab("Residuales")