setwd("~/NYU/classes/2. R/Assignments/Lesson 5")
library(readr)
gdp <- read_csv("gdp.csv")
## Rows: 264 Columns: 60
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (2): Country Name, Country Code
## dbl (58): 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, ...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
life_expectancy <- read_csv("life_expectancy.csv")
## Rows: 264 Columns: 60
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (2): Country Name, Country Code
## dbl (57): 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, ...
## lgl  (1): 2017
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
employment <- read_csv("employment.csv")
## Rows: 264 Columns: 60
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (2): Country Name, Country Code
## dbl (27): 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, ...
## lgl (31): 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, ...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#create a new data frame
#Merge the columns for the year 2016 for GDP, Life Expectancy, and Employment into a new data frame and clean-up the new table.

countries_2016 = data.frame(gdp$`Country Name`, gdp$`2016`, life_expectancy$`2016`, employment$`2016`)
#Task #1: rename the columns to "country", "gdp", "life_expectancy", and "employment"
names(countries_2016)=c("country", "gdp", "life_expectancy", "employment")
names(countries_2016)
## [1] "country"         "gdp"             "life_expectancy" "employment"
#Task #2: Convert the employment number to percentages by dividing by 100
countries_2016$employment=(countries_2016$employment/100)
#Task #3: Round life expectancy to zero decimals and employment to two decimals
countries_2016$life_expectancy=round(countries_2016$life_expectancy, digits=0)
countries_2016$employment=round(countries_2016$employment,digits=2)
#Task #4: Create a frequency table for each variable - purposly excluded 
table(countries_2016$life_expectancy)
## 
## 52 53 54 56 57 58 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 
##  2  2  2  1  3  5 10  3  5  8  6  5  6  8  3 11  9 11 10 14 11 20 18 16  7  3 
## 80 81 82 83 84 
## 10 12 15  6  3
table(countries_2016$employment)
## 
## 0.29 0.33 0.35 0.37 0.38 0.39  0.4 0.41 0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49 
##    1    3    2    2    1    1    6    5    2    5    4    2    2    2    2    6 
##  0.5 0.51 0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59  0.6 0.61 0.62 0.63 0.64 0.65 
##    3    4    6   10   11   13    9    6   11   17   10   10    6    7    8    7 
## 0.66 0.67 0.68 0.69  0.7 0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 0.81 0.82 
##    7    7    6    4    2    1    2    1    1    2    2    2    3    3    1    1 
## 0.84 0.85 0.87 
##    1    2    1
#Task #5: Draw histograms for each variable
hist(countries_2016$gdp, breaks = 10, main = "GDP by country in 2016", xlab = "GDP", col = "Blue", labels=TRUE, border = "#FFFFFF")

hist(countries_2016$life_expectancy, breaks = 10, main = "Life Expectancy by country in 2016", xlab = "Life Expectancy", col = "Orange", labels=TRUE, border = "#FFFFFF")

hist(countries_2016$employment, breaks = 10, main = "Employment by country in 2016", xlab = "Employment (%)", col = "Red", labels=TRUE, border = "#FFFFFF")