Background

Many boast America to be the land of the free. But is its population truly represented in government? Let’s compare the numbers.

Here are some data summaries:

##  Country.Name       Country.Code            Year     
##  Length:4945        Length:4945        Min.   :1997  
##  Class :character   Class :character   1st Qu.:2002  
##  Mode  :character   Mode  :character   Median :2008  
##                                        Mean   :2008  
##                                        3rd Qu.:2014  
##                                        Max.   :2019  
##                                                      
##  Proportion.of.seats.held.by.women.in.national.parliaments....
##  Min.   :0.00000                                              
##  1st Qu.:0.09673                                              
##  Median :0.16071                                              
##  Mean   :0.17100                                              
##  3rd Qu.:0.22449                                              
##  Max.   :0.63750                                              
##  NA's   :152
##      name             alpha.2            alpha.3           country.code  
##  Length:249         Length:249         Length:249         Min.   :  4.0  
##  Class :character   Class :character   Class :character   1st Qu.:218.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :434.0  
##                                                           Mean   :433.8  
##                                                           3rd Qu.:652.0  
##                                                           Max.   :894.0  
##                                                                          
##   iso_3166.2           region           sub.region        intermediate.region
##  Length:249         Length:249         Length:249         Length:249         
##  Class :character   Class :character   Class :character   Class :character   
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character   
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##   region.code     sub.region.code  intermediate.region.code
##  Min.   :  2.00   Min.   : 15.00   Min.   :  5.00          
##  1st Qu.:  9.00   1st Qu.: 53.75   1st Qu.: 11.00          
##  Median : 19.00   Median :154.00   Median : 14.00          
##  Mean   : 65.95   Mean   :179.87   Mean   : 31.72          
##  3rd Qu.:142.00   3rd Qu.:202.00   3rd Qu.: 29.00          
##  Max.   :150.00   Max.   :419.00   Max.   :830.00          
##  NA's   :1        NA's   :1        NA's   :142

Average Global Proportions by Year

len <- diff(range(data$Year)) +1
yearly_global_averages <- data.frame("Year" = rep(NA, len), "Average_Proportion" = rep(NA, len))
yearly_global_averages
##    Year Average_Proportion
## 1    NA                 NA
## 2    NA                 NA
## 3    NA                 NA
## 4    NA                 NA
## 5    NA                 NA
## 6    NA                 NA
## 7    NA                 NA
## 8    NA                 NA
## 9    NA                 NA
## 10   NA                 NA
## 11   NA                 NA
## 12   NA                 NA
## 13   NA                 NA
## 14   NA                 NA
## 15   NA                 NA
## 16   NA                 NA
## 17   NA                 NA
## 18   NA                 NA
## 19   NA                 NA
## 20   NA                 NA
## 21   NA                 NA
## 22   NA                 NA
## 23   NA                 NA
year = min(data$Year)
for(i in 1:len){
  yearly_global_averages$Year[i] = year
  year = year + 1
  
  temp_subset <- subset(data, Year == yearly_global_averages$Year[i])
  yearly_global_averages$Average_Proportion[i] = mean(temp_subset$Proportion.of.seats.held.by.women.in.national.parliaments...., na.rm=T)  
  
}
yearly_global_averages
##    Year Average_Proportion
## 1  1997          0.1046606
## 2  1998          0.1114280
## 3  1999          0.1150949
## 4  2000          0.1190276
## 5  2001          0.1235919
## 6  2002          0.1365661
## 7  2003          0.1403334
## 8  2004          0.1466991
## 9  2005          0.1558366
## 10 2006          0.1593915
## 11 2007          0.1697290
## 12 2008          0.1722057
## 13 2009          0.1786979
## 14 2010          0.1799222
## 15 2011          0.1857965
## 16 2012          0.1943831
## 17 2013          0.2025219
## 18 2014          0.2059669
## 19 2015          0.2112004
## 20 2016          0.2148817
## 21 2017          0.2191850
## 22 2018          0.2250185
## 23 2019          0.2323173
p <- ggplot(data=yearly_global_averages, mapping = aes(x=Year, y=Average_Proportion)) +
  geom_point()
p

Graphing and Comparing Select Countries

data <- merge(data, region_data, by.x="Country.Name", by.y="name", all.x=TRUE, all.y=FALSE)
data <- merge(data, yearly_global_averages, by="Year")

countries_with_highest_GDPs <- subset(data, Country.Name == "United States" | 
                                        Country.Name == "China" |
                                        Country.Name == "Japan" |
                                        Country.Name == "Germany" |
                                        Country.Name == "India" |
                                        Country.Name == "United Kingdom" |
                                        Country.Name == "France")

countries_with_highest_GDPs$sub.region = ifelse (countries_with_highest_GDPs$Country.Name == "United States", "Americas", countries_with_highest_GDPs$sub.region)
countries_with_highest_GDPs$sub.region = ifelse (countries_with_highest_GDPs$Country.Name == "United Kingdom", "Northern Europe", countries_with_highest_GDPs$sub.region)


p <- ggplot(data=countries_with_highest_GDPs, mapping = aes(x=Year, y=Proportion.of.seats.held.by.women.in.national.parliaments...., shape=Country.Name, color=sub.region)) +
  geom_point(na.rm=T, size = 3, alpha = .8)+
  scale_shape_manual(values=seq(0,9))+
  geom_line(data=countries_with_highest_GDPs, aes(x=Year,y=Average_Proportion), color="gray")+
  ggtitle("Proportion of Women in Political Leadership")+
  ylab("Proportion")
p <- style(p, showlegend=FALSE, traces=8:14)

ggplotly(p)