library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dplyr)
library(ggplot2)
countrystat <- read_csv("country_stat.csv")
## Rows: 10545 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): country, continent, region
## dbl (6): year, infant_mortality, life_expectancy, fertility, population, gdp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Question 1:

In order to find how many missing values are in this data set, I am going to use is.na(). Then, to show how data is missing and where it is missing, I am going to use colSums(is.na()).

sum(is.na(countrystat))
## [1] 4797
colSums(is.na(countrystat))
##          country             year infant_mortality  life_expectancy 
##                0                0             1453                0 
##        fertility       population              gdp        continent 
##              187              185             2972                0 
##           region 
##                0

Answer:

There are 4797 missing values in this data set. The missing values are found in infant mortality, fertility, population, and GDP. Infant mortality has the most amount of missing values at 1453.

Question 2:

In order to find the total amount of unique countries included in the data I am going to use the unique() function. I will also use this function to find how many unique years are included.

unique(countrystat$country)
##   [1] "Albania"                        "Algeria"                       
##   [3] "Angola"                         "Antigua and Barbuda"           
##   [5] "Argentina"                      "Armenia"                       
##   [7] "Aruba"                          "Australia"                     
##   [9] "Austria"                        "Azerbaijan"                    
##  [11] "Bahamas"                        "Bahrain"                       
##  [13] "Bangladesh"                     "Barbados"                      
##  [15] "Belarus"                        "Belgium"                       
##  [17] "Belize"                         "Benin"                         
##  [19] "Bhutan"                         "Bolivia"                       
##  [21] "Bosnia and Herzegovina"         "Botswana"                      
##  [23] "Brazil"                         "Brunei"                        
##  [25] "Bulgaria"                       "Burkina Faso"                  
##  [27] "Burundi"                        "Cambodia"                      
##  [29] "Cameroon"                       "Canada"                        
##  [31] "Cape Verde"                     "Central African Republic"      
##  [33] "Chad"                           "Chile"                         
##  [35] "China"                          "Colombia"                      
##  [37] "Comoros"                        "Congo, Dem. Rep."              
##  [39] "Congo, Rep."                    "Costa Rica"                    
##  [41] "Cote d'Ivoire"                  "Croatia"                       
##  [43] "Cuba"                           "Cyprus"                        
##  [45] "Czech Republic"                 "Denmark"                       
##  [47] "Djibouti"                       "Dominican Republic"            
##  [49] "Ecuador"                        "Egypt"                         
##  [51] "El Salvador"                    "Equatorial Guinea"             
##  [53] "Eritrea"                        "Estonia"                       
##  [55] "Ethiopia"                       "Fiji"                          
##  [57] "Finland"                        "France"                        
##  [59] "French Polynesia"               "Gabon"                         
##  [61] "Gambia"                         "Georgia"                       
##  [63] "Germany"                        "Ghana"                         
##  [65] "Greece"                         "Greenland"                     
##  [67] "Grenada"                        "Guatemala"                     
##  [69] "Guinea"                         "Guinea-Bissau"                 
##  [71] "Guyana"                         "Haiti"                         
##  [73] "Honduras"                       "Hong Kong, China"              
##  [75] "Hungary"                        "Iceland"                       
##  [77] "India"                          "Indonesia"                     
##  [79] "Iran"                           "Iraq"                          
##  [81] "Ireland"                        "Israel"                        
##  [83] "Italy"                          "Jamaica"                       
##  [85] "Japan"                          "Jordan"                        
##  [87] "Kazakhstan"                     "Kenya"                         
##  [89] "Kiribati"                       "South Korea"                   
##  [91] "Kuwait"                         "Kyrgyz Republic"               
##  [93] "Lao"                            "Latvia"                        
##  [95] "Lebanon"                        "Lesotho"                       
##  [97] "Liberia"                        "Libya"                         
##  [99] "Lithuania"                      "Luxembourg"                    
## [101] "Macao, China"                   "Macedonia, FYR"                
## [103] "Madagascar"                     "Malawi"                        
## [105] "Malaysia"                       "Maldives"                      
## [107] "Mali"                           "Malta"                         
## [109] "Mauritania"                     "Mauritius"                     
## [111] "Mexico"                         "Micronesia, Fed. Sts."         
## [113] "Moldova"                        "Mongolia"                      
## [115] "Montenegro"                     "Morocco"                       
## [117] "Mozambique"                     "Namibia"                       
## [119] "Nepal"                          "Netherlands"                   
## [121] "New Caledonia"                  "New Zealand"                   
## [123] "Nicaragua"                      "Niger"                         
## [125] "Nigeria"                        "Norway"                        
## [127] "Oman"                           "Pakistan"                      
## [129] "Panama"                         "Papua New Guinea"              
## [131] "Paraguay"                       "Peru"                          
## [133] "Philippines"                    "Poland"                        
## [135] "Portugal"                       "Puerto Rico"                   
## [137] "Qatar"                          "Romania"                       
## [139] "Russia"                         "Rwanda"                        
## [141] "St. Lucia"                      "St. Vincent and the Grenadines"
## [143] "Samoa"                          "Saudi Arabia"                  
## [145] "Senegal"                        "Serbia"                        
## [147] "Seychelles"                     "Sierra Leone"                  
## [149] "Singapore"                      "Slovak Republic"               
## [151] "Slovenia"                       "Solomon Islands"               
## [153] "South Africa"                   "Spain"                         
## [155] "Sri Lanka"                      "Sudan"                         
## [157] "Suriname"                       "Swaziland"                     
## [159] "Sweden"                         "Switzerland"                   
## [161] "Syria"                          "Tajikistan"                    
## [163] "Tanzania"                       "Thailand"                      
## [165] "Timor-Leste"                    "Togo"                          
## [167] "Tonga"                          "Trinidad and Tobago"           
## [169] "Tunisia"                        "Turkey"                        
## [171] "Turkmenistan"                   "Uganda"                        
## [173] "Ukraine"                        "United Arab Emirates"          
## [175] "United Kingdom"                 "United States"                 
## [177] "Uruguay"                        "Uzbekistan"                    
## [179] "Vanuatu"                        "Venezuela"                     
## [181] "West Bank and Gaza"             "Vietnam"                       
## [183] "Yemen"                          "Zambia"                        
## [185] "Zimbabwe"
unique(countrystat$year)
##  [1] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974
## [16] 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989
## [31] 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
## [46] 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016

Answer:

There are 185 unique countries included in this data set. There are 57 years included in this data set.

Question 3:

To create this new variable, I am going to use the mutate function.

countrystat2 <- mutate(countrystat, GDP_per_capita = gdp/population)

Question 4:

1:

How does the infertility rate compare in Albania and France? Which country has a higher infertility rate? This is going to be my time series visualization. I am going to create a wider data frame with year, country, and infertility rate. Then, I will use geom_area() to graph this comparison.

country_wide = pivot_wider(countrystat2, id_cols = year, 
                           names_from = country, values_from = infant_mortality)
ggplot(data = country_wide, aes(x = year)) +
  geom_area(aes(y = Albania, fill = "Albania"),alpha = 0.5) +
  geom_area(aes(y = France    ,fill = "France"),alpha = 0.5) +
  scale_fill_manual(name = 'Name',values = c("#FBAD01","#FF0000")) +
  scale_x_continuous(expand = c(0,0)) +
  ylab('Name Frequency') + xlab('Year') +
  theme(legend.position = "top")
## Warning: Removed 1 rows containing missing values (position_stack).
## Removed 1 rows containing missing values (position_stack).

# Observation: Infertility rate is noticably higher in Albania than it is in France. It stays much higher throughout all of the years that are included in this data set.

2:

Which continent has the highest population?

ggplot(countrystat2, aes(x = continent, fill = population)) +
  geom_bar() +
  ggtitle("Count of Population Per Continent") +
  labs(x = "Continents",
       y = "Count of Population")

# Observation: This graph shows me that Africa is the continent with the highest population and Oceania is the continent with the smalled population.

3:

Which African region has the lowest life expectancy? What is the average life expectancy of that specific region?

africanregions <- filter(countrystat2, region == c("Northern Africa", "Middle Africa", "Western Africa", "Southern Africa", "Eastern Africa" ))
ggplot(africanregions, aes(x = life_expectancy, y = continent)) +
  geom_boxplot() +
  ggtitle("Life Expectancy in African Regions") +
  labs(x = "Life Expectancy",
       y = "Africa") +
  facet_wrap(~region)

# Observation: This graph shows me that the Middle Africa region has the lowest life expectancy at just under 60 years old at the high tail end of the things, on the low tail end it is just below 40 years old. The average in that region appears to be just under 50 years old.

4:

Which region has the highest GDP per capita?

ggplot(countrystat2, aes(x = GDP_per_capita, y = region)) +
  geom_col() +
  ggtitle("GDP Per Capita in each Region") +
  labs(x = "GDP Per Capita",
       y = "Regions")
## Warning: Removed 2972 rows containing missing values (position_stack).

# Observation: This graph shows me that Northern Europe is the region that has the highest GDP per capita and Central Asia has the lowest GDP per capita.