Gov.exp = wb_data %>% filter(Indicator.Name=="Government expenditure on education, total (% of GDP)") #filter the rows for the indicator of "Government expenditure on education, total (% of GDP)"
head(Gov.exp,20)
## # A tibble: 20 × 64
## ...1 Country.Name Country.Code Indicator.Name Indicator.Code X1960 X1961
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 8 Aruba ABW Government ex… SE.XPD.TOTL.G… NA NA
## 2 166 Afghanistan AFG Government ex… SE.XPD.TOTL.G… NA NA
## 3 324 Angola AGO Government ex… SE.XPD.TOTL.G… NA NA
## 4 482 Albania ALB Government ex… SE.XPD.TOTL.G… NA NA
## 5 640 Andorra AND Government ex… SE.XPD.TOTL.G… NA NA
## 6 798 Arab World ARB Government ex… SE.XPD.TOTL.G… NA NA
## 7 956 United Arab Emi… ARE Government ex… SE.XPD.TOTL.G… NA NA
## 8 1114 Argentina ARG Government ex… SE.XPD.TOTL.G… NA NA
## 9 1272 Armenia ARM Government ex… SE.XPD.TOTL.G… NA NA
## 10 1430 American Samoa ASM Government ex… SE.XPD.TOTL.G… NA NA
## 11 1588 Antigua and Bar… ATG Government ex… SE.XPD.TOTL.G… NA NA
## 12 1746 Australia AUS Government ex… SE.XPD.TOTL.G… NA NA
## 13 1904 Austria AUT Government ex… SE.XPD.TOTL.G… NA NA
## 14 2062 Azerbaijan AZE Government ex… SE.XPD.TOTL.G… NA NA
## 15 2220 Burundi BDI Government ex… SE.XPD.TOTL.G… NA NA
## 16 2378 Belgium BEL Government ex… SE.XPD.TOTL.G… NA NA
## 17 2536 Benin BEN Government ex… SE.XPD.TOTL.G… NA NA
## 18 2694 Burkina Faso BFA Government ex… SE.XPD.TOTL.G… NA NA
## 19 2852 Bangladesh BGD Government ex… SE.XPD.TOTL.G… NA NA
## 20 3010 Bulgaria BGR Government ex… SE.XPD.TOTL.G… NA NA
## # ℹ 57 more variables: X1962 <dbl>, X1963 <dbl>, X1964 <dbl>, X1965 <dbl>,
## # X1966 <dbl>, X1967 <dbl>, X1968 <dbl>, X1969 <dbl>, X1970 <dbl>,
## # X1971 <dbl>, X1972 <dbl>, X1973 <dbl>, X1974 <dbl>, X1975 <dbl>,
## # X1976 <dbl>, X1977 <dbl>, X1978 <dbl>, X1979 <dbl>, X1980 <dbl>,
## # X1981 <dbl>, X1982 <dbl>, X1983 <dbl>, X1984 <dbl>, X1985 <dbl>,
## # X1986 <dbl>, X1987 <dbl>, X1988 <dbl>, X1989 <dbl>, X1990 <dbl>,
## # X1991 <dbl>, X1992 <dbl>, X1993 <dbl>, X1994 <dbl>, X1995 <dbl>, …
DF = melt(Gov.exp[,c(2,36,60)]) #create data frame with country and years 1990 & 2014 into long format
## Using Country.Name as id variables
head(DF,20)
## Country.Name variable value
## 1 Aruba X1990 NA
## 2 Afghanistan X1990 NA
## 3 Angola X1990 NA
## 4 Albania X1990 NA
## 5 Andorra X1990 NA
## 6 Arab World X1990 NA
## 7 United Arab Emirates X1990 NA
## 8 Argentina X1990 1.06738
## 9 Armenia X1990 NA
## 10 American Samoa X1990 NA
## 11 Antigua and Barbuda X1990 NA
## 12 Australia X1990 4.67038
## 13 Austria X1990 4.97711
## 14 Azerbaijan X1990 NA
## 15 Burundi X1990 3.35722
## 16 Belgium X1990 NA
## 17 Benin X1990 NA
## 18 Burkina Faso X1990 NA
## 19 Bangladesh X1990 1.51894
## 20 Bulgaria X1990 4.45406
DF.NA = na.omit(DF) #omit NA's otherwise will give warning
p <- ggplot(DF.NA, aes(x = value, y = factor(variable), col = factor(variable))) +
geom_point() +
labs(x = "Value", y = "Year",
title = "Government Expenditures on Education for 1990 & 2014",
caption = "Voronyak 2023") +
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))
p + scale_color_discrete(name = "Year", labels = c("1990", "2014"))
long_year_data = wb_data %>%
gather(key = year, # "Year" will be the new key column
value = value, # "Value" will be the new value column
X1960:X )# All columns between "X1960" and "X" will be gathered
head(long_year_data,20)
## # A tibble: 20 × 7
## ...1 Country.Name Country.Code Indicator.Name Indicator.Code year value
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1 Aruba ABW Population ages 1… SP.POP.1564.T… X1960 53.7
## 2 2 Aruba ABW Population ages 0… SP.POP.0014.T… X1960 43.8
## 3 3 Aruba ABW Unemployment, tot… SL.UEM.TOTL.ZS X1960 NA
## 4 4 Aruba ABW Unemployment, mal… SL.UEM.TOTL.M… X1960 NA
## 5 5 Aruba ABW Unemployment, fem… SL.UEM.TOTL.F… X1960 NA
## 6 6 Aruba ABW Labor force, total SL.TLF.TOTL.IN X1960 NA
## 7 7 Aruba ABW Labor force, fema… SL.TLF.TOTL.F… X1960 NA
## 8 8 Aruba ABW Government expend… SE.XPD.TOTL.G… X1960 NA
## 9 9 Aruba ABW Government expend… SE.XPD.TOTL.G… X1960 NA
## 10 10 Aruba ABW Expenditure on te… SE.XPD.TERT.ZS X1960 NA
## 11 11 Aruba ABW Government expend… SE.XPD.TERT.P… X1960 NA
## 12 12 Aruba ABW Expenditure on se… SE.XPD.SECO.ZS X1960 NA
## 13 13 Aruba ABW Government expend… SE.XPD.SECO.P… X1960 NA
## 14 14 Aruba ABW Expenditure on pr… SE.XPD.PRIM.ZS X1960 NA
## 15 15 Aruba ABW Government expend… SE.XPD.PRIM.P… X1960 NA
## 16 16 Aruba ABW All education sta… SE.XPD.MTOT.ZS X1960 NA
## 17 17 Aruba ABW All education sta… SE.XPD.MTER.ZS X1960 NA
## 18 18 Aruba ABW All education sta… SE.XPD.MSEC.ZS X1960 NA
## 19 19 Aruba ABW All education sta… SE.XPD.MPRM.ZS X1960 NA
## 20 20 Aruba ABW Current education… SE.XPD.CTOT.ZS X1960 NA
Gov.exp = long_year_data %>% filter(Indicator.Name=="Government expenditure on education, total (% of GDP)")
Gov.exp$year = as.numeric(substr(Gov.exp$year,2,5)) #remove X for year and make numeric
head(Gov.exp,20)
## # A tibble: 20 × 7
## ...1 Country.Name Country.Code Indicator.Name Indicator.Code year value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 8 Aruba ABW Government ex… SE.XPD.TOTL.G… 1960 NA
## 2 166 Afghanistan AFG Government ex… SE.XPD.TOTL.G… 1960 NA
## 3 324 Angola AGO Government ex… SE.XPD.TOTL.G… 1960 NA
## 4 482 Albania ALB Government ex… SE.XPD.TOTL.G… 1960 NA
## 5 640 Andorra AND Government ex… SE.XPD.TOTL.G… 1960 NA
## 6 798 Arab World ARB Government ex… SE.XPD.TOTL.G… 1960 NA
## 7 956 United Arab Emi… ARE Government ex… SE.XPD.TOTL.G… 1960 NA
## 8 1114 Argentina ARG Government ex… SE.XPD.TOTL.G… 1960 NA
## 9 1272 Armenia ARM Government ex… SE.XPD.TOTL.G… 1960 NA
## 10 1430 American Samoa ASM Government ex… SE.XPD.TOTL.G… 1960 NA
## 11 1588 Antigua and Bar… ATG Government ex… SE.XPD.TOTL.G… 1960 NA
## 12 1746 Australia AUS Government ex… SE.XPD.TOTL.G… 1960 NA
## 13 1904 Austria AUT Government ex… SE.XPD.TOTL.G… 1960 NA
## 14 2062 Azerbaijan AZE Government ex… SE.XPD.TOTL.G… 1960 NA
## 15 2220 Burundi BDI Government ex… SE.XPD.TOTL.G… 1960 NA
## 16 2378 Belgium BEL Government ex… SE.XPD.TOTL.G… 1960 NA
## 17 2536 Benin BEN Government ex… SE.XPD.TOTL.G… 1960 NA
## 18 2694 Burkina Faso BFA Government ex… SE.XPD.TOTL.G… 1960 NA
## 19 2852 Bangladesh BGD Government ex… SE.XPD.TOTL.G… 1960 NA
## 20 3010 Bulgaria BGR Government ex… SE.XPD.TOTL.G… 1960 NA
ChinaUSA = Gov.exp[Gov.exp$Country.Name %in% c("China", "United States"),]
ChinaUSA
## # A tibble: 118 × 7
## ...1 Country.Name Country.Code Indicator.Name Indicator.Code year value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 6012 China CHN Government expen… SE.XPD.TOTL.G… 1960 NA
## 2 39350 United States USA Government expen… SE.XPD.TOTL.G… 1960 NA
## 3 6012 China CHN Government expen… SE.XPD.TOTL.G… 1961 NA
## 4 39350 United States USA Government expen… SE.XPD.TOTL.G… 1961 NA
## 5 6012 China CHN Government expen… SE.XPD.TOTL.G… 1962 NA
## 6 39350 United States USA Government expen… SE.XPD.TOTL.G… 1962 NA
## 7 6012 China CHN Government expen… SE.XPD.TOTL.G… 1963 NA
## 8 39350 United States USA Government expen… SE.XPD.TOTL.G… 1963 NA
## 9 6012 China CHN Government expen… SE.XPD.TOTL.G… 1964 NA
## 10 39350 United States USA Government expen… SE.XPD.TOTL.G… 1964 NA
## # ℹ 108 more rows
ggplot(ChinaUSA, aes(x = year, y = value, col = Country.Name)) +
geom_line()
## Warning: Removed 60 rows containing missing values (`geom_line()`).
Based off of the above line plots we can see that the US spends a lot more on education based on their GDP than China. We can also see it is fluctuate from year to year as well. It appears that we track China very well up to 2000 and the US after 2000. I wonder why this is so.
literacy = long_year_data %>% filter(Indicator.Name=="Literacy rate, adult total (% of people ages 15 and above)" & year == "X2014") #using only adults due to comparing employment
unemploy = long_year_data %>% filter(Indicator.Name=="Unemployment, total (% of total labor force) (modeled ILO estimate)" & year == "X2014")
head(literacy,20)
## # A tibble: 20 × 7
## ...1 Country.Name Country.Code Indicator.Name Indicator.Code year value
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 152 Aruba ABW Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 2 310 Afghanistan AFG Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 3 468 Angola AGO Literacy rate… SE.ADT.LITR.ZS X2014 66.0
## 4 626 Albania ALB Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 5 784 Andorra AND Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 6 942 Arab World ARB Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 7 1100 United Arab Emi… ARE Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 8 1258 Argentina ARG Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 9 1416 Armenia ARM Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 10 1574 American Samoa ASM Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 11 1732 Antigua and Bar… ATG Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 12 1890 Australia AUS Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 13 2048 Austria AUT Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 14 2206 Azerbaijan AZE Literacy rate… SE.ADT.LITR.ZS X2014 99.8
## 15 2364 Burundi BDI Literacy rate… SE.ADT.LITR.ZS X2014 61.6
## 16 2522 Belgium BEL Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 17 2680 Benin BEN Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 18 2838 Burkina Faso BFA Literacy rate… SE.ADT.LITR.ZS X2014 34.6
## 19 2996 Bangladesh BGD Literacy rate… SE.ADT.LITR.ZS X2014 NA
## 20 3154 Bulgaria BGR Literacy rate… SE.ADT.LITR.ZS X2014 NA
head(unemploy,20)
## # A tibble: 20 × 7
## ...1 Country.Name Country.Code Indicator.Name Indicator.Code year value
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 3 Aruba ABW Unemployment,… SL.UEM.TOTL.ZS X2014 NA
## 2 161 Afghanistan AFG Unemployment,… SL.UEM.TOTL.ZS X2014 8.60
## 3 319 Angola AGO Unemployment,… SL.UEM.TOTL.ZS X2014 6.20
## 4 477 Albania ALB Unemployment,… SL.UEM.TOTL.ZS X2014 17.5
## 5 635 Andorra AND Unemployment,… SL.UEM.TOTL.ZS X2014 NA
## 6 793 Arab World ARB Unemployment,… SL.UEM.TOTL.ZS X2014 11.4
## 7 951 United Arab Emi… ARE Unemployment,… SL.UEM.TOTL.ZS X2014 4
## 8 1109 Argentina ARG Unemployment,… SL.UEM.TOTL.ZS X2014 7.30
## 9 1267 Armenia ARM Unemployment,… SL.UEM.TOTL.ZS X2014 17.6
## 10 1425 American Samoa ASM Unemployment,… SL.UEM.TOTL.ZS X2014 NA
## 11 1583 Antigua and Bar… ATG Unemployment,… SL.UEM.TOTL.ZS X2014 NA
## 12 1741 Australia AUS Unemployment,… SL.UEM.TOTL.ZS X2014 6.10
## 13 1899 Austria AUT Unemployment,… SL.UEM.TOTL.ZS X2014 5.60
## 14 2057 Azerbaijan AZE Unemployment,… SL.UEM.TOTL.ZS X2014 4.90
## 15 2215 Burundi BDI Unemployment,… SL.UEM.TOTL.ZS X2014 1.60
## 16 2373 Belgium BEL Unemployment,… SL.UEM.TOTL.ZS X2014 8.5
## 17 2531 Benin BEN Unemployment,… SL.UEM.TOTL.ZS X2014 1
## 18 2689 Burkina Faso BFA Unemployment,… SL.UEM.TOTL.ZS X2014 3.30
## 19 2847 Bangladesh BGD Unemployment,… SL.UEM.TOTL.ZS X2014 4.20
## 20 3005 Bulgaria BGR Unemployment,… SL.UEM.TOTL.ZS X2014 11.4
plot(literacy$value, unemploy$value)
You would think that the higher literacy rates would have a low unemployment rate. This seems to be the case for a lot of the data, but the highest unemployment rates are also found when the the literacy rate is high. Also, if the the literacy rate is really low, then the unemployment rate seems to be really low too. It would be intersting to see if the missing data would help find more conclusions.