setwd("~/NYU/classes/2. R/Assignments/Lesson 5")
library(readr)
gdp <- read_csv("gdp.csv")
## Rows: 264 Columns: 60
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): Country Name, Country Code
## dbl (58): 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, ...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
life_expectancy <- read_csv("life_expectancy.csv")
## Rows: 264 Columns: 60
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): Country Name, Country Code
## dbl (57): 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, ...
## lgl (1): 2017
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
employment <- read_csv("employment.csv")
## Rows: 264 Columns: 60
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): Country Name, Country Code
## dbl (27): 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, ...
## lgl (31): 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, ...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#create a new data frame
#Merge the columns for the year 2016 for GDP, Life Expectancy, and Employment into a new data frame and clean-up the new table.
countries_2016 = data.frame(gdp$`Country Name`, gdp$`2016`, life_expectancy$`2016`, employment$`2016`)
#Task #1: rename the columns to "country", "gdp", "life_expectancy", and "employment"
names(countries_2016)=c("country", "gdp", "life_expectancy", "employment")
names(countries_2016)
## [1] "country" "gdp" "life_expectancy" "employment"
#Task #2: Convert the employment number to percentages by dividing by 100
countries_2016$employment=(countries_2016$employment/100)
#Task #3: Round life expectancy to zero decimals and employment to two decimals
countries_2016$life_expectancy=round(countries_2016$life_expectancy, digits=0)
countries_2016$employment=round(countries_2016$employment,digits=2)
#Task #4: Create a frequency table for each variable - purposly excluded
table(countries_2016$life_expectancy)
##
## 52 53 54 56 57 58 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
## 2 2 2 1 3 5 10 3 5 8 6 5 6 8 3 11 9 11 10 14 11 20 18 16 7 3
## 80 81 82 83 84
## 10 12 15 6 3
table(countries_2016$employment)
##
## 0.29 0.33 0.35 0.37 0.38 0.39 0.4 0.41 0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49
## 1 3 2 2 1 1 6 5 2 5 4 2 2 2 2 6
## 0.5 0.51 0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59 0.6 0.61 0.62 0.63 0.64 0.65
## 3 4 6 10 11 13 9 6 11 17 10 10 6 7 8 7
## 0.66 0.67 0.68 0.69 0.7 0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 0.81 0.82
## 7 7 6 4 2 1 2 1 1 2 2 2 3 3 1 1
## 0.84 0.85 0.87
## 1 2 1
#Task #5: Draw histograms for each variable
hist(countries_2016$gdp, breaks = 10, main = "GDP by country in 2016", xlab = "GDP", col = "Blue", labels=TRUE, border = "#FFFFFF")

hist(countries_2016$life_expectancy, breaks = 10, main = "Life Expectancy by country in 2016", xlab = "Life Expectancy", col = "Orange", labels=TRUE, border = "#FFFFFF")

hist(countries_2016$employment, breaks = 10, main = "Employment by country in 2016", xlab = "Employment (%)", col = "Red", labels=TRUE, border = "#FFFFFF")
