Many boast America to be the land of the free. But is its population truly represented in government? Let’s compare the numbers.
Here are some data summaries:
## Country.Name Country.Code Year
## Length:4945 Length:4945 Min. :1997
## Class :character Class :character 1st Qu.:2002
## Mode :character Mode :character Median :2008
## Mean :2008
## 3rd Qu.:2014
## Max. :2019
##
## Proportion.of.seats.held.by.women.in.national.parliaments....
## Min. :0.00000
## 1st Qu.:0.09673
## Median :0.16071
## Mean :0.17100
## 3rd Qu.:0.22449
## Max. :0.63750
## NA's :152
## name alpha.2 alpha.3 country.code
## Length:249 Length:249 Length:249 Min. : 4.0
## Class :character Class :character Class :character 1st Qu.:218.0
## Mode :character Mode :character Mode :character Median :434.0
## Mean :433.8
## 3rd Qu.:652.0
## Max. :894.0
##
## iso_3166.2 region sub.region intermediate.region
## Length:249 Length:249 Length:249 Length:249
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## region.code sub.region.code intermediate.region.code
## Min. : 2.00 Min. : 15.00 Min. : 5.00
## 1st Qu.: 9.00 1st Qu.: 53.75 1st Qu.: 11.00
## Median : 19.00 Median :154.00 Median : 14.00
## Mean : 65.95 Mean :179.87 Mean : 31.72
## 3rd Qu.:142.00 3rd Qu.:202.00 3rd Qu.: 29.00
## Max. :150.00 Max. :419.00 Max. :830.00
## NA's :1 NA's :1 NA's :142
len <- diff(range(data$Year)) +1
yearly_global_averages <- data.frame("Year" = rep(NA, len), "Average_Proportion" = rep(NA, len))
yearly_global_averages
## Year Average_Proportion
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## 7 NA NA
## 8 NA NA
## 9 NA NA
## 10 NA NA
## 11 NA NA
## 12 NA NA
## 13 NA NA
## 14 NA NA
## 15 NA NA
## 16 NA NA
## 17 NA NA
## 18 NA NA
## 19 NA NA
## 20 NA NA
## 21 NA NA
## 22 NA NA
## 23 NA NA
year = min(data$Year)
for(i in 1:len){
yearly_global_averages$Year[i] = year
year = year + 1
temp_subset <- subset(data, Year == yearly_global_averages$Year[i])
yearly_global_averages$Average_Proportion[i] = mean(temp_subset$Proportion.of.seats.held.by.women.in.national.parliaments...., na.rm=T)
}
yearly_global_averages
## Year Average_Proportion
## 1 1997 0.1046606
## 2 1998 0.1114280
## 3 1999 0.1150949
## 4 2000 0.1190276
## 5 2001 0.1235919
## 6 2002 0.1365661
## 7 2003 0.1403334
## 8 2004 0.1466991
## 9 2005 0.1558366
## 10 2006 0.1593915
## 11 2007 0.1697290
## 12 2008 0.1722057
## 13 2009 0.1786979
## 14 2010 0.1799222
## 15 2011 0.1857965
## 16 2012 0.1943831
## 17 2013 0.2025219
## 18 2014 0.2059669
## 19 2015 0.2112004
## 20 2016 0.2148817
## 21 2017 0.2191850
## 22 2018 0.2250185
## 23 2019 0.2323173
p <- ggplot(data=yearly_global_averages, mapping = aes(x=Year, y=Average_Proportion)) +
geom_point()
p
data <- merge(data, region_data, by.x="Country.Name", by.y="name", all.x=TRUE, all.y=FALSE)
data <- merge(data, yearly_global_averages, by="Year")
countries_with_highest_GDPs <- subset(data, Country.Name == "United States" |
Country.Name == "China" |
Country.Name == "Japan" |
Country.Name == "Germany" |
Country.Name == "India" |
Country.Name == "United Kingdom" |
Country.Name == "France")
countries_with_highest_GDPs$sub.region = ifelse (countries_with_highest_GDPs$Country.Name == "United States", "Americas", countries_with_highest_GDPs$sub.region)
countries_with_highest_GDPs$sub.region = ifelse (countries_with_highest_GDPs$Country.Name == "United Kingdom", "Northern Europe", countries_with_highest_GDPs$sub.region)
p <- ggplot(data=countries_with_highest_GDPs, mapping = aes(x=Year, y=Proportion.of.seats.held.by.women.in.national.parliaments...., shape=Country.Name, color=sub.region)) +
geom_point(na.rm=T, size = 3, alpha = .8)+
scale_shape_manual(values=seq(0,9))+
geom_line(data=countries_with_highest_GDPs, aes(x=Year,y=Average_Proportion), color="gray")+
ggtitle("Proportion of Women in Political Leadership")+
ylab("Proportion")
p <- style(p, showlegend=FALSE, traces=8:14)
ggplotly(p)