Easy Test for R org gsoc 2022: To make an age pyramid for the capital city of a given state and then compare it with the state’s capital, we need data from the deccenial census 2010 at the appropriate geographic level. One fantastic way to do that is by getting the data using the us census bureau’s api. There is ample documentation on summary file 1 i.e sf1 to give the user an overview about the variables that it has and what they represent. In order to get an accurate representation of the different age groups respective to their genders, i wanted to plot male and female genders seprately. the pct12 group of the sf1 had my required variables: The required results can be viewed by using the API: https://api.census.gov/data/2010/dec/sf1?get=group(PCT12)NAME&for=place:*&in=state:06&key=Your key goes here where place is used to get the data of the California state at the cdp level. The given data can then be saved in your desired format.

# Load packages
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
# Load data
data<- read_excel("C:/Users/ADMIN/Downloads/Age_by_sex_california.xlsx")
head(data)
#manipulating the data to get it in the desired format
data <- data%>%
  pivot_longer(names_to = 'city', values_to = 'Population', cols = 2:3) %>%
  mutate(PopPerc=case_when(city=='San Francicso'~round(Population/sum(Population)*100,2),
                           TRUE~-round(Population/sum(Population)*100,2)),
         signal=case_when(city=='San Francicso'~1,
                          TRUE~-1))
head(data)
levels(data$Age)
## NULL

While doing the manipulation of the data, city column is used to store the names of the cities and the values of the corresponding city are stored in the population column. Mutate is used to calculate the percentage of values of each age group per city. the sacramento values are then mutated to negative in order to make a pyramid

# Load data and plotting it
ggplot(data)+
  geom_bar(aes(x=Age,y=PopPerc,fill=city),stat='identity')+ 
geom_text(aes(x=Age,y=PopPerc+signal*.3,label=aes(NULL)))+
  #this is used to flip the chart
  coord_flip()+
  scale_fill_manual(name='',values=c('green','Darkgreen'))+
  #adds colour to the pyramid blocks
  scale_y_continuous(breaks=seq(-10,10,1),
                     labels=function(x){paste(abs(x),'%')})+
  labs(x='Age',y='population',
       title='Total population of sacramento vs san francisco(US Census 2010) ',
       subtitle=paste('.'),
       caption='.' + scale_x_discrete(expand = expansion(mult = c(0.8, 1), add = c(0, 0.2))))+
  theme(axis.text.x=element_blank(), #remove x axis labels
        axis.ticks.x=element_blank(),
        plot.title = element_text(size=12),
        panel.background = element_rect(fill = "light yellow", colour = "yellow"),
         legend.position = 'top',
        legend.justification = 'center')
## Don't know how to automatically pick scale for object of type uneval. Defaulting to continuous.
## Warning: Removed 38 rows containing missing values (geom_text).

# the theme function is used to make the map asthetically pleasing. I used the yellow background because it makes the colours pop out more, 

For male population:

library(ggplot2)
library(tidyverse)
library(readxl)
# Load data
data_m= read_excel("C:/Users/ADMIN/Downloads/Age_by_sex_california.xlsx", 
    sheet = "Males")
data_m <- data_m%>%
  pivot_longer(names_to = 'city', values_to = 'Population', cols = 2:3) %>%
  mutate(PopPerc=case_when(city=='San Francicso'~round(Population/sum(Population)*100,2),
                           TRUE~-round(Population/sum(Population)*100,2)),
         signal=case_when(city=='San Francicso'~1,
                          TRUE~-1))
head(data_m)
levels(data$Age)
## NULL
data$Age <- factor(data$Age,levels=unique(data$Age),ordered=TRUE)
#loading the data and plotting it.
ggplot(data_m)+
  geom_bar(aes(x=Age,y=PopPerc,fill=city),stat='identity')+ 
geom_text(aes(x=Age,y=PopPerc+signal*.3,label=aes(NULL)))+
  #this is used to flip the chart
  coord_flip()+ 
  #adds colour to the pyramid blocks
  scale_fill_manual(name='',values=c('blue','Darkblue'))+
  scale_y_continuous(breaks=seq(-10,10,1),
                     labels=function(x){paste(abs(x),'%')})+
  labs(x='Age',y='population',
       title='Male population of sacramento vs san francisco(US Census 2010) ',
       subtitle=paste('.'),
       caption='.' + scale_x_discrete(expand = expansion(mult = c(0.8, 1), add = c(0, 0.2))))+
  theme(axis.text.x=element_blank(), #remove x axis labels
        axis.ticks.x=element_blank(),
        plot.title = element_text(size=12),
        panel.background = element_rect(fill = "light yellow", colour = "yellow"),
         legend.position = 'top',
        legend.justification = 'center')
## Don't know how to automatically pick scale for object of type uneval. Defaulting to continuous.
## Warning: Removed 38 rows containing missing values (geom_text).

# the theme function is used to make the map asthetically pleasing. I used the yellow background because it makes the colours pop out more, 

For the female population:

#used the same code as the above except for changing the data from total to female
ggplot(data_f)+
  geom_bar(aes(x=Age,y=PopPerc,fill=city),stat='identity')+ 
geom_text(aes(x=Age,y=PopPerc+signal*.3,label=aes(NULL)))+
  coord_flip()+
  scale_fill_manual(name='',values=c('lightpink','violet'))+
  scale_y_continuous(breaks=seq(-10,10,1),
                     labels=function(x){paste(abs(x),'%')})+
  labs(x='Age',y='population',
       title='Female population of sacramento vs san francisco(US Census 2010) ',
       subtitle=paste('.'),
       caption='.' + scale_x_discrete(expand = expansion(mult = c(0.8, 1), add = c(0, 0.2))))+
  theme(axis.text.x=element_blank(), #remove x axis labels
        axis.ticks.x=element_blank(),
        plot.title = element_text(size=12),
        panel.background = element_rect(fill = "light yellow", colour = "yellow"),
         legend.position = 'top',
        legend.justification = 'center')
## Don't know how to automatically pick scale for object of type uneval. Defaulting to continuous.
## Warning: Removed 38 rows containing missing values (geom_text).