library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
TX <- read_delim("Texas.txt", delim = "\t")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 86 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Notes, Age Group, Age Group Code, Gender, Gender Code
## dbl (1): Population
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
CA <- read_delim("California.txt", delim = "\t")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 86 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Notes, Age Group, Age Group Code, Gender, Gender Code
## dbl (1): Population
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
TX <- TX[!is.na(TX$Gender), ]
CA <- CA[!is.na(CA$Gender), ]
TX <- TX %>%
mutate(`Age Group` = factor(`Age Group`, levels = unique(`Age Group`)) )
CA <- CA %>%
mutate(`Age Group` = factor(`Age Group`, levels = unique(`Age Group`)) )
## Distribution
## Texas
ggplot(TX, aes(x = `Age Group`, y = Population, group = Gender, color = Gender)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
labs(title = "Age Distribution by Gender, Texas",
x = "Age Group", y = "Population") +
scale_y_continuous(labels = scales::comma) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## California
ggplot(CA, aes(x = `Age Group`, y = Population, group = Gender, color = Gender)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
labs(title = "Age Distribution by Gender, California",
x = "Age Group", y = "Population") +
scale_y_continuous(labels = scales::comma) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

TX <- TX %>%
mutate(Population = ifelse(Gender == "Male", -Population, Population))
CA <- CA %>%
mutate(Population = ifelse(Gender == "Male", -Population, Population) )
## Population Pyramid
## TX
ggplot(TX, aes(x = `Age Group`, y = Population, fill = Gender)) +
geom_bar(stat = "identity", width = 0.7) +
coord_flip() +
scale_y_continuous(
breaks = seq(-1600000, 1600000, 400000),
labels = function(x) format(abs(x), big.mark = ","),
limits = c(-1600000, 1600000)
) +
labs(title = "Population Pyramid by Gender, Texas",
x = "Age Group", y = "Population") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal() +
theme(legend.title = element_blank())

## CA
ggplot(CA, aes(x = `Age Group`, y = Population, fill = Gender)) +
geom_bar(stat = "identity", width = 0.7) +
coord_flip() +
scale_y_continuous(
breaks = seq(-16000000, 1600000, 400000),
labels = function(x) format(abs(x), big.mark = ","),
limits = c(-1600000, 1600000)
) +
labs(title = "Population Pyramid by Gender, California",
x = "Age Group", y = "Population") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal() +
theme(legend.title = element_blank())
