Modelos Estadísticos. Grado Biotecnología
Abstract
Documento para la resolución de la tarea 2 (Práctica 5)Cargamos la librerías y los datos que vamos a utilizar
library(tidyverse)
library(stringr)
library(forcats)
library(lubridate)
library(magrittr)
library(broom)
library(datasets)Para este conjunto de datos debes realizar las siguientes operaciones de procesado original:
airquality
str(airquality)
# Creamos factores
airquality$Month <- as.factor(airquality$Month)
levels(airquality$Month) <- c("Mayo", "Junio","Julio","Agosto","Septiembre")
airquality$Day <- as.factor(airquality.new$Day)
levels(airquality$Day) <- as.character(1:31)No olvides eliminar los valores pérdidos correspondientes a cada análisis
Factores: Month, Day
# Eliminamos valores pérdidos
airquality.new <- select(airquality,Month)
airquality.new <- na.omit(airquality.new)
# banco de datos
tabla_tipo <- airquality.new %>%
group_by(Month) %>%
summarise(n=n())
# Para calcular los porcentajes
mutate(tabla_tipo,percent=round(100*n/sum(n),2))ggplot(airquality.new, aes(x = Month)) +
geom_bar(aes(y = ..prop.. , group = 1)) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
xlab("Mes") + ylab("Porcentaje")+
theme_bw() # Eliminamos valores pérdidos
airquality.new <- select(airquality,Day)
airquality.new <- na.omit(airquality.new)
# banco de datos
tabla_tipo <- airquality.new %>%
# agrupamos por la variable factor
group_by(Day) %>%
# resumimos contando el número de casos de cada nivel del factor
summarise(n=n())
# Para calcular los porcentajes
mutate(tabla_tipo,percent=round(100*n/sum(n),2))ggplot(airquality.new, aes(x = Day)) +
geom_bar(aes(y = ..prop.. , group = 1)) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
xlab("Dia") + ylab("Porcentaje")+
theme_bw() Numéricas: Ozone, Solar.R, Wind, Temp
Ozone
airquality.new <- select(airquality,Ozone)
airquality.new <- na.omit(airquality.new)
airquality.new %>%
summarise_each(
funs(mean,sd),Ozone)
ggplot(airquality.new, aes(x = Ozone)) +
geom_histogram() Solar.R
airquality.new <- select(airquality,Solar.R)
airquality.new <- na.omit(airquality.new)
airquality.new %>%
summarise_each(
funs(mean,sd),Solar.R)
ggplot(airquality.new, aes(x = Solar.R)) +
geom_histogram() Wind
airquality.new <- select(airquality,Wind)
airquality.new <- na.omit(airquality.new)
airquality.new %>%
summarise_each(
funs(mean,sd),Wind)
ggplot(airquality.new, aes(x = Wind)) +
geom_histogram() Temp
airquality.new <- select(airquality,Temp)
airquality.new <- na.omit(airquality.new)
airquality.new %>%
summarise_each(
funs(mean,sd),Temp)
ggplot(airquality.new, aes(x = Temp)) +
geom_histogram() Ozone y Solar.R
airquality.new <- select(airquality,Ozone,Solar.R)
airquality.new <- na.omit(airquality.new)
cor(airquality.new$Ozone,airquality.new$Solar.R)
ggplot(airquality.new, aes(x = Solar.R, y = Ozone)) +
geom_point() +
labs(x = "Radiación solar", y = "Ozono")Month y Day
# Realizamos los cálculos ahora
airquality.new <- select(airquality,Month, Day)
airquality.new <- na.omit(airquality.new)
tabla_dbl <- airquality.new %>%
count(Month,Day) %>%
mutate(porcentaje=round(100*n/sum(n),2))
tabla_dbl
# Conteos
tabla_conteos <- select(tabla_dbl,Month,Day,n)
spread(tabla_conteos, key = Month, value = n)
# Porcentajes
tabla_porcentajes <- select(tabla_dbl,Month,Day,porcentaje)
spread(tabla_porcentajes, key = Month, value = porcentaje)
### Gráfico
ggplot(airquality, aes(x = Day, fill = Month)) +
geom_bar() +
labs(x = "Mes", y = "Número de observaciones", fill = "Dia") +
theme_bw()Ozone y Month
airquality.new <- select(airquality,Ozone,Month)
airquality.new <- na.omit(airquality.new)
# Tabla
airquality.new %>%
group_by(Month) %>%
summarise_each(
funs(mean,sd),Ozone)
#Gráfico
ggplot(airquality, aes(x = Month, y = Ozone)) +
geom_boxplot() +
labs(x = "Mes", y = "Ozono") Solar.R y Day
airquality.new <- select(airquality,Solar.R,Day)
airquality.new <- na.omit(airquality.new)
# Tabla
airquality.new %>%
group_by(Day) %>%
summarise_each(
funs(mean,sd),Solar.R)
#Gráfico
ggplot(airquality.new, aes(x = Day, y = Solar.R)) +
geom_boxplot() +
labs(x = "Día", y = "Radiaciòn Solar") ncbirth=read_csv("https://goo.gl/mB9Jcn", col_types = "dcddcccdccddcc")
str(ncbirth)
# Recodificación del factor
ncbirth = ncbirth %>% mutate(sex=fct_recode(sex,"male"="1","female"="2"),
marital=fct_recode(marital,"married"="1","not married"="2"),
racemom=fct_recode(racemom,"other non white"="0","White"="1","Black"="2","America indian"="3","Chinese"="4","Hawaiian"="5","Filipino"="6","Other asian"="7","Other"="8"),
hispmom=fct_recode(hispmom,"Cuban"="C","Mexican"="M","Non-Hispanic"="N","Other"="O","Puerto Rican"="P","Central/South american"="S","U"="Not classificable"),
smoke=fct_recode(smoke,"Yes"="1","No"="0"),
drink=fct_recode(drink,"Yes"="1","No"="0"),
low=fct_recode(low,"Yes"="1","No"="0"),
premie=fct_recode(premie,"Yes"="1","No"="0"))En este banco de datos no hya valores pérdidos.
plural (N); mage (N); weeks (N); gained (N); tounces (N); tgrams (N)
ncbirth %>%
summarise_each(
funs(mean,sd),plural, mage, weeks, gained,tounces,tgrams)ggplot(ncbirth, aes(x = factor(1),y = plural)) +
geom_boxplot()
ggplot(ncbirth, aes(x = factor(1),y = mage)) +
geom_boxplot()
ggplot(ncbirth, aes(x = factor(1),y = weeks)) +
geom_boxplot()
ggplot(ncbirth, aes(x = factor(1),y = gained)) +
geom_boxplot()
ggplot(ncbirth, aes(x = factor(1),y = tounces)) +
geom_boxplot()
ggplot(ncbirth, aes(x = factor(1),y = tgrams)) +
geom_boxplot()sex (F)
# banco de datos
tabla_tipo <- ncbirth %>%
# agrupamos por la variable factor
group_by(sex) %>%
# resumimos contando el número de casos de cada nivel del factor
summarise(n=n())
# Para calcular los porcentajes
mutate(tabla_tipo,percent=round(100*n/sum(n),2))ggplot(ncbirth, aes(x = sex)) +
geom_bar(aes(y = ..prop.. , group = 1)) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
xlab("Sexo") + ylab("Porcentaje")+
theme_bw() marital (F)
# banco de datos
tabla_tipo <- ncbirth %>%
# agrupamos por la variable factor
group_by(marital) %>%
# resumimos contando el número de casos de cada nivel del factor
summarise(n=n())
# Para calcular los porcentajes
mutate(tabla_tipo,percent=round(100*n/sum(n),2))ggplot(ncbirth, aes(x = marital)) +
geom_bar(aes(y = ..prop.. , group = 1)) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
xlab("Estado") + ylab("Porcentaje")+
theme_bw() racemom (F)
# banco de datos
tabla_tipo <- ncbirth %>%
# agrupamos por la variable factor
group_by(racemom) %>%
# resumimos contando el número de casos de cada nivel del factor
summarise(n=n())
# Para calcular los porcentajes
mutate(tabla_tipo,percent=round(100*n/sum(n),2))ggplot(ncbirth, aes(x = racemom)) +
geom_bar(aes(y = ..prop.. , group = 1)) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
xlab("Raza de la madre") + ylab("Porcentaje")+
theme_bw() hispmom (F)
# banco de datos
tabla_tipo <- ncbirth %>%
# agrupamos por la variable factor
group_by(hispmom) %>%
# resumimos contando el número de casos de cada nivel del factor
summarise(n=n())
# Para calcular los porcentajes
mutate(tabla_tipo,percent=round(100*n/sum(n),2))ggplot(ncbirth, aes(x = hispmom)) +
geom_bar(aes(y = ..prop.. , group = 1)) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
xlab("") + ylab("Porcentaje")+
theme_bw() smoke (F)
# banco de datos
tabla_tipo <- ncbirth %>%
# agrupamos por la variable factor
group_by(smoke) %>%
# resumimos contando el número de casos de cada nivel del factor
summarise(n=n())
# Para calcular los porcentajes
mutate(tabla_tipo,percent=round(100*n/sum(n),2))ggplot(ncbirth, aes(x = smoke)) +
geom_bar(aes(y = ..prop.. , group = 1)) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
xlab("") + ylab("Porcentaje")+
theme_bw() drink (F)
# banco de datos
tabla_tipo <- ncbirth %>%
# agrupamos por la variable factor
group_by(drink) %>%
# resumimos contando el número de casos de cada nivel del factor
summarise(n=n())
# Para calcular los porcentajes
mutate(tabla_tipo,percent=round(100*n/sum(n),2))ggplot(ncbirth, aes(x = drink)) +
geom_bar(aes(y = ..prop.. , group = 1)) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
xlab("") + ylab("Porcentaje")+
theme_bw() low (F)
# banco de datos
tabla_tipo <- ncbirth %>%
# agrupamos por la variable factor
group_by(low) %>%
# resumimos contando el número de casos de cada nivel del factor
summarise(n=n())
# Para calcular los porcentajes
mutate(tabla_tipo,percent=round(100*n/sum(n),2))ggplot(ncbirth, aes(x = low)) +
geom_bar(aes(y = ..prop.. , group = 1)) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
xlab("") + ylab("Porcentaje")+
theme_bw() premie (F)
# banco de datos
tabla_tipo <- ncbirth %>%
# agrupamos por la variable factor
group_by(premie) %>%
# resumimos contando el número de casos de cada nivel del factor
summarise(n=n())
# Para calcular los porcentajes
mutate(tabla_tipo,percent=round(100*n/sum(n),2))ggplot(ncbirth, aes(x = premie)) +
geom_bar(aes(y = ..prop.. , group = 1)) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
xlab("") + ylab("Porcentaje")+
theme_bw() mage y sex (N y F)
# Tabla
ncbirth %>%
group_by(sex) %>%
summarise_each(
funs(mean,sd),mage)
#Gráfico
ggplot(ncbirth, aes(x = sex, y = mage)) +
geom_boxplot() +
labs(x = "Sexo", y = "Edad de la madre") racemom y sex (F y F)
# Realizamos los cálculos ahora
tabla_dbl <- ncbirth %>%
count(racemom,sex) %>%
mutate(porcentaje=round(100*n/sum(n),2))
tabla_dbl
# Conteos
tabla_conteos <- select(tabla_dbl,racemom,sex,n)
spread(tabla_conteos, key = racemom, value = n)
# Porcentajes
tabla_porcentajes <- select(tabla_dbl,racemom,sex,porcentaje)
spread(tabla_porcentajes, key = racemom, value = porcentaje)
### Gráfico
ggplot(ncbirth, aes(x = sex, fill = racemom)) +
geom_bar() +
labs(x = "Sexo", y = "Número de observaciones", fill = "racemom") +
theme_bw()smoke y drink (F y F)
# Realizamos los cálculos ahora
tabla_dbl <- ncbirth %>%
count(smoke,drink) %>%
mutate(porcentaje=round(100*n/sum(n),2))
tabla_dbl
# Conteos
tabla_conteos <- select(tabla_dbl,smoke,drink,n)
spread(tabla_conteos, key = smoke, value = n)
# Porcentajes
tabla_porcentajes <- select(tabla_dbl,smoke,drink,porcentaje)
spread(tabla_porcentajes, key = smoke, value = porcentaje)
### Gráfico
ggplot(ncbirth, aes(x = drink, fill = smoke)) +
geom_bar() +
labs(x = "drink", y = "Número de observaciones", fill = "smoke") +
theme_bw()smoke y gained (F y N)
# Tabla
ncbirth %>%
group_by(smoke) %>%
summarise_each(
funs(mean,sd),gained)
#Gráfico
ggplot(ncbirth, aes(x = smoke, y = gained)) +
geom_boxplot() +
labs(x = "smoke", y = "gained") drink y gained (F y N)
# Tabla
ncbirth %>%
group_by(drink) %>%
summarise_each(
funs(mean,sd),gained)
#Gráfico
ggplot(ncbirth, aes(x = drink, y = gained)) +
geom_boxplot() +
labs(x = "drink", y = "gained") mage y gained (N y N)
cor(ncbirth$mage,ncbirth$gained)
ggplot(ncbirth, aes(x = mage, y = gained)) +
geom_point() +
labs(x = "mage", y = "gained")mage y tgrams (N y N)
cor(ncbirth$mage,ncbirth$tgrams)
ggplot(ncbirth, aes(x = mage, y = tgrams)) +
geom_point() +
labs(x = "mage", y = "tgrams")weeks y gained (N y N)
cor(ncbirth$weeks,ncbirth$gained)
ggplot(ncbirth, aes(x = weeks, y = gained)) +
geom_point() +
labs(x = "weeks", y = "gained")weeks y tgrams (N y N)
cor(ncbirth$weeks,ncbirth$tgrams)
ggplot(ncbirth, aes(x = weeks, y = tgrams)) +
geom_point() +
labs(x = "weeks", y = "tgrams")Copyright © 2018 Javier Morales. Universidad Miguel Hernández de Elche.