library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
url="https://raw.githubusercontent.com/mcfrank/tidyverse-tutorial/master/data/ws.csv"
df<-read.csv(url)
head(df)
## data_id age comprehension production language form birth_order ethnicity
## 1 51699 27 497 497 English WS Fourth Hispanic
## 2 51700 21 369 369 English WS Second White
## 3 51701 26 190 190 English WS Fourth White
## 4 51702 27 264 264 English WS Second White
## 5 51703 19 159 159 English WS Second Other
## 6 51704 30 513 513 English WS Second Other
## sex zygosity norming longitudinal source_name mom_ed
## 1 Female NA TRUE FALSE Marchman Some Secondary
## 2 Female NA TRUE FALSE Marchman Secondary
## 3 Female NA TRUE FALSE Marchman College
## 4 Male NA TRUE FALSE Marchman Secondary
## 5 Female NA TRUE FALSE Marchman Secondary
## 6 Female NA TRUE FALSE Marchman Secondary
Promedio de la comprensión para cada etnia
df%>%group_by(ethnicity)%>%summarize(promedio=mean(comprehension))
## # A tibble: 6 x 2
## ethnicity promedio
## <chr> <dbl>
## 1 Asian 273.
## 2 Black 289.
## 3 Hispanic 221.
## 4 Other 223.
## 5 White 281.
## 6 <NA> 256.
Promedio de la producción para cada sexo
df%>%group_by(sex)%>%summarize(promedio=mean(production))
## # A tibble: 3 x 2
## sex promedio
## <chr> <dbl>
## 1 Female 297.
## 2 Male 260.
## 3 <NA> 233.
hist(df$age,col="blue", main="Histograma de la distribución por edad de los participantes",xlab="Age",freq = FALSE)
