options(scipen=999)
library(tidyverse)
library(socviz)
library(datasetsICR)LAB 5
READ THIS DISCLAIMER THIS QUARTO FILE OFFERS A BASIC OUTLINE FOR THE REPORT AND EXAMPLE FOR SOME OF THE CODE.
THIS FILE IS NOT COMPLETE AND NOT INTENDED FOR YOU TO SIMPLY REPLICATE.
YOU WILL NEED TO WORK THROUGH ALL REQUIREMENTS ON YOUR OWN, INCLUDING INTERPRETATIONS, AESTHETICS, LABELS, COLORS, TITLES, SUBTITLES, ETC.
BE CREATIVE!
PART 1: PRACTICE USING PIPES (dplyr) TO SUMMARIZE DATA: TWO CATEGORICAL VARIABLES
data(FIFA)
pip1 <- FIFA %>%
group_by(Nationality, Preferred.Foot) %>%
summarize(N = n()) %>%
mutate(freq = N/sum(N),
pct = round((freq*100),0)) %>%
filter(Nationality %in% c("Spain", "Brazil", "France", "Italy", "Argentina"))
pip1# A tibble: 15 × 5
# Groups: Nationality [5]
Nationality Preferred.Foot N freq pct
<fct> <fct> <int> <dbl> <dbl>
1 Argentina "" 1 0.00107 0
2 Argentina "Left" 214 0.228 23
3 Argentina "Right" 722 0.771 77
4 Brazil "" 2 0.00242 0
5 Brazil "Left" 245 0.296 30
6 Brazil "Right" 580 0.701 70
7 France "" 3 0.00328 0
8 France "Left" 236 0.258 26
9 France "Right" 675 0.739 74
10 Italy "" 3 0.00427 0
11 Italy "Left" 189 0.269 27
12 Italy "Right" 510 0.726 73
13 Spain "" 1 0.000933 0
14 Spain "Left" 298 0.278 28
15 Spain "Right" 773 0.721 72
PART 2: CREATE STACKED AND DODGED BAR CHARTS FROM 2 CATEGORICAL VARIABLES
p_title <- "Preferred Foot by Nationality"
p_caption <- "FIFA dataset"
# AS STACKED BAR CHART
p <- ggplot(data = subset(pip1, !is.na(Preferred.Foot) & !is.na(Nationality)),
aes(x=Nationality, y=pct, fill = Preferred.Foot))
p + geom_col(position = "stack") +
labs(x="Nationality", y="Percent", fill = "Preferred Foot",
title = p_title, caption = p_caption,
subtitle = "As a stacked bar chart") +
geom_text(aes(label=pct), position = position_stack(vjust=.5))# AS DODGED BAR CHART
p + geom_col(position = "dodge2") +
labs(x="Nationality", y="Percent", fill = "Preferred Foot",
title = p_title, caption = p_caption,
subtitle = "As a dodged bar chart") +
geom_text(aes(label = pct), position = position_dodge(width = .9)) # AS FACETED HORIZONTAL BAR CHART
p + geom_col(position = "dodge2") +
labs(x=NULL, y="Percent", fill = "Preferred Foot",
title = p_title, caption = p_caption,
subtitle = "As a faceted horizontal bar chart") +
guides(fill = "none") +
coord_flip() +
facet_grid(~ Nationality) +
geom_text(aes(label = pct), position = position_dodge2(width = 1))PART 3: PRACTICE USING PIPES (dplyr) TO SUMMARIZE DATA: TWO CONTINUOUS & ONE CATEGORICAL VARIABLE
pip2 <- FIFA %>%
group_by(Nationality) %>%
summarize(N = n(),
age_mean = mean(Age, na.rm=TRUE),
weight_mean = mean(Weight, na.rm=TRUE)) %>%
mutate(freq = N/sum(N),
pct = round((freq*100),0)) %>%
filter(Nationality %in% c("Spain", "Brazil", "France", "Italy", "Argentina"))
pip2# A tibble: 5 × 6
Nationality N age_mean weight_mean freq pct
<fct> <int> <dbl> <dbl> <dbl> <dbl>
1 Argentina 937 26.2 75.1 0.0515 5
2 Brazil 827 27.6 76.0 0.0454 5
3 France 914 24.6 75.6 0.0502 5
4 Italy 702 25.9 76.0 0.0386 4
5 Spain 1072 25.3 74.0 0.0589 6
PART 4: SCATTERPLOT WITH A THIRD CATEGORICAL VARIABLE
p <- ggplot(pip2, aes(x=weight_mean, y=age_mean, color=Nationality))
p + geom_point(size=5) +
annotate(geom = "text", x = 75, y=25,
label = "France has the lowest average age.", hjust=0) +
labs(y="Average Age", x="Average Weight",
title="Age and Weight by Nationality",
subtitle = "Spain has the lowest average weight.",
caption = "FIFA dataset{datasetsICR}")PART 5: LEGEND AND GUIDES
p <- ggplot(pip2, aes(x=weight_mean, y=age_mean, color=Nationality))
p + geom_point(size=5) +
annotate(geom = "text", x = 74, y=25,
label = "Spain has the lowest average weight.", hjust=0) +
labs(y="Average Age", x="Average Weight",
color = "Nationality", title="Age and Weight by Nationality",
subtitle = "Brazil has the highest average weight and age.",
caption = "FIFA dataset{datasetsICR}") +
theme(legend.title = element_text(color="gray50", size=14, face="bold"),
legend.position = c(x=.2, y=.7))PART 6: DATA LABELS VS LEGEND
p <- ggplot(pip2, aes(x=weight_mean, y=age_mean, color=Nationality))
p + geom_point(size=5) +
geom_text(mapping = aes(label=Nationality), hjust=1.5, size=3) +
annotate(geom = "text", x = 73, y=25,
label = "Spain has the lowest average weight and the second lowest average age.", hjust=0) +
labs(y="Average Age", x="Average Weight",
title="Age and Weight by Nationality",
color = "Nationality",
subtitle = "Spain has the lowest average weight.",
caption = "FIFA dataset{datasetsICR}") +
theme(legend.position = "none")PART 7: INTERPRETATION
Through the graphs created, we can see that FIFA players from Brazil have the highest average age and weight. Spain has the lowest average weight and the second lowest average age. France has the lowest average age and the third highest weight. Argentina has the second highest age and the second lowest weight. Italy has the second highest average weight and the third lowest age. The average weight only varied by about 2 kilograms and the average age varied by about 3 years.