# Import the data
gdp_yoy <- read_csv(file = 'new_data.csv')
## Rows: 6 Columns: 54
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country Name, Country Code, Series Name, Series Code
## dbl (50): 1971 [YR1971], 1972 [YR1972], 1973 [YR1973], 1974 [YR1974], 1975 [...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename the columns
names(gdp_yoy) <- c("Series Name","Series Code","Country Name",
"Country Code", "1971", "1972", "1973", "1974",
"1975", "1976", "1977", "1978",
"1979", "1980", "1981","1982" , "1983", "1984",
"1985", "1986", "1987", "1988","1989", "1990",
"1991","1992" , "1993", "1994", "1995", "1996",
"1997", "1998","1999", "2000", "2001", "2002",
"2003", "2004", "2005", "2006", "2007", "2008",
"2009", "2010", "2011", "2012", "2013", "2014",
"2015", "2016", "2017", "2018","2019","2020")
# Tidy the data
tidy_gdp_yoy <-
gdp_yoy %>%
gather('1971': '2020', key="year",
value="gdp")
tidy_gdp_yoy$gdp <- as.numeric(tidy_gdp_yoy$gdp)
tidy_gdp_yoy$year <- as.numeric(tidy_gdp_yoy$year)
tidy_gdp_yoy <- na.omit(tidy_gdp_yoy)
# View the data
print(tidy_gdp_yoy)
## # A tibble: 50 × 6
## `Series Name` `Series Code` `Country Name` `Country Code` year gdp
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Saudi Arabia SAU GDP growth (annual %) NY.GDP.MKTP.KD… 1971 20.5
## 2 Saudi Arabia SAU GDP growth (annual %) NY.GDP.MKTP.KD… 1972 22.9
## 3 Saudi Arabia SAU GDP growth (annual %) NY.GDP.MKTP.KD… 1973 24.2
## 4 Saudi Arabia SAU GDP growth (annual %) NY.GDP.MKTP.KD… 1974 16.2
## 5 Saudi Arabia SAU GDP growth (annual %) NY.GDP.MKTP.KD… 1975 -8.93
## 6 Saudi Arabia SAU GDP growth (annual %) NY.GDP.MKTP.KD… 1976 17.8
## 7 Saudi Arabia SAU GDP growth (annual %) NY.GDP.MKTP.KD… 1977 7.09
## 8 Saudi Arabia SAU GDP growth (annual %) NY.GDP.MKTP.KD… 1978 -5.22
## 9 Saudi Arabia SAU GDP growth (annual %) NY.GDP.MKTP.KD… 1979 11.9
## 10 Saudi Arabia SAU GDP growth (annual %) NY.GDP.MKTP.KD… 1980 5.65
## # … with 40 more rows
# create data
xValue <- tidy_gdp_yoy$year
yValue <- tidy_gdp_yoy$gdp
data <- data.frame(xValue,yValue)
# Plot Saudi Arabia Real GDP growth 1971-2020 (annual %)
ggplot(data, aes(x=xValue, y=yValue)) +
geom_area(fill="#69b3a2") +
geom_point(size=.2, color="#69b3a2") +
labs(y = 'Percent (%)') +
labs(x = 'Year') +
#labs(caption = "Data source: https://data.worldbank.org/")
theme(axis.text.x = element_text(angle = 0)) +
theme(aspect.ratio=6/20) +
ggtitle("Saudi Arabia Real GDP growth 1971-2020 (annual %)")
# Import the data
unemployment_saudi <- read_csv(file = 'saudiunemployment.csv')
## Rows: 9 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): Country Name, Country Code, Series Name, Series Code, 1990 [YR1990...
## dbl (16): 1999 [YR1999], 2000 [YR2000], 2001 [YR2001], 2002 [YR2002], 2006 [...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename the columns
names(unemployment_saudi) <- c("Series Name","Series Code","Unemployment Group",
"Country Code", "1990",
"1991","1992" , "1993", "1994", "1995", "1996",
"1997", "1998","1999", "2000", "2001", "2002",
"2003", "2004", "2005", "2006", "2007", "2008",
"2009", "2010", "2011", "2012", "2013", "2014",
"2015", "2016", "2017", "2018","2019","2020")
# Tidy the data
tidy_unemployment_saudi <-
unemployment_saudi %>%
gather('1990': '2020', key="year",
value="unemployement")
tidy_unemployment_saudi$unemployment <- as.numeric(tidy_unemployment_saudi$unemployement)
tidy_unemployment_saudi$year <- as.numeric(tidy_unemployment_saudi$year)
tidy_unemployment_saudi <- na.omit(tidy_unemployment_saudi)
# View the data
print(tidy_unemployment_saudi)
## # A tibble: 76 × 7
## `Series Name` `Series Code` `Unemployment Group` `Country Code` year
## <chr> <chr> <chr> <chr> <dbl>
## 1 Saudi Arabia SAU Unemployment, female (% of… SL.UEM.TOTL.FE… 1992
## 2 Saudi Arabia SAU Unemployment, male (% of m… SL.UEM.TOTL.MA… 1992
## 3 Saudi Arabia SAU Unemployment, female (% of… SL.UEM.TOTL.FE… 1999
## 4 Saudi Arabia SAU Unemployment, male (% of m… SL.UEM.TOTL.MA… 1999
## 5 Saudi Arabia SAU Unemployment, youth female… SL.UEM.1524.FE… 1999
## 6 Saudi Arabia SAU Unemployment, youth male (… SL.UEM.1524.MA… 1999
## 7 Saudi Arabia SAU Unemployment, female (% of… SL.UEM.TOTL.FE… 2000
## 8 Saudi Arabia SAU Unemployment, male (% of m… SL.UEM.TOTL.MA… 2000
## 9 Saudi Arabia SAU Unemployment, youth female… SL.UEM.1524.FE… 2000
## 10 Saudi Arabia SAU Unemployment, youth male (… SL.UEM.1524.MA… 2000
## # … with 66 more rows, and 2 more variables: unemployement <chr>,
## # unemployment <dbl>
# Use ggplot2's functions to visualize insights.
# Plot Saudi Arabia Unemployment % by Gender (1990 - 2020)
tidy_unemployment_saudi %>%
ggplot( aes(x=year, y=unemployment, group=`Unemployment Group`, color=`Unemployment Group`))+ geom_line()+
theme(legend.position = 'bottom', legend.direction = "vertical") +
labs(y = 'Percent (%)') +
labs(x = 'Year') +
ggtitle(paste('Saudi Arabia Unemployment % by Gender (1990 - 2020)'))
#import the data
test <- read_csv(file = 'cpivsinflation.csv')
## Rows: 7 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country Name, Country Code, Series Name, Series Code
## dbl (11): 2004 [YR2004], 2005 [YR2005], 2006 [YR2006], 2007 [YR2007], 2008 [...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
year <- c(rep("2004" , 2), rep("2005" , 2), rep("2006" , 2), rep("2007" , 2), rep("2008" , 2), rep("2009" , 2), rep("2010" , 2), rep("2011" , 2), rep("2012" , 2), rep("2013" , 2), rep("2014" , 2), rep("2015" , 2), rep("2016" , 2), rep("2017" , 2), rep("2018" , 2), rep("2019" , 2), rep("2020" , 2))
percent <- c(.51,11,
.47,20.1,
2.2,11.5,
4.1,8.4,
9.8,17.6,
5.05,-15.7,
5.3,17.19,
5.82,15.52,
2.86,4.01,
3.5,-1.2,
2.2,-2.2,
1.2,-16.9,
2.06,-3.04,
-.83,7.5,
2.4,11.5,
-2.09,.48,
3.44,-7.92)
legend <- rep(c("Inflation, CPI %" , "Inflation, GDP Deflator %") , 1)
data2 <- data.frame(year,legend,percent)
library(ggplot2)
ggplot(data2,aes(fill=legend, y=percent, x=year)) +
theme(axis.text.x = element_text(angle = 45)) +
geom_bar(position="dodge",stat="identity")
# Import the data
oil <- read_csv(file = 'crude-oil-prices_new.csv')
## Rows: 50 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Oil - Crude prices since 1861 (current $)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gdpdollars <- read_csv(file = 'gdpdollars.csv')
## Rows: 6 Columns: 54
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country Name, Country Code, Series Name, Series Code
## dbl (50): 1971 [YR1971], 1972 [YR1972], 1973 [YR1973], 1974 [YR1974], 1975 [...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename the columns
names(oil) <- c("Country Code","Series Code","year", "gdp")
# Rename the columns
names(gdpdollars) <- c("Series Name","Series Code","Country Name",
"Country Code", "1971", "1972", "1973", "1974",
"1975", "1976", "1977", "1978",
"1979", "1980", "1981","1982" , "1983", "1984",
"1985", "1986", "1987", "1988","1989", "1990",
"1991","1992" , "1993", "1994", "1995", "1996",
"1997", "1998","1999", "2000", "2001", "2002",
"2003", "2004", "2005", "2006", "2007", "2008",
"2009", "2010", "2011", "2012", "2013", "2014",
"2015", "2016", "2017", "2018","2019","2020")
# Tidy the data
tidy_gdpdollars <-
gdpdollars %>%
gather('1971': '2020', key="year",
value="gdp")
tidy_gdpdollars$gdp <- as.numeric(tidy_gdpdollars$gdp)
tidy_gdpdollars$year <- as.numeric(tidy_gdpdollars$year)
tidy_gdpdollars <- na.omit(tidy_gdpdollars)
oil$gdp <- as.numeric(oil$gdp)
oil$`Country Name` <- "Oil Price"
oil$`Series Name` <- "Oil"
Oil <- na.omit(oil$gdp)
print(Oil)
## [1] 2.24000 2.48000 3.29000 11.58000 11.53000 12.80000 13.92000
## [8] 14.02000 31.61000 36.83000 35.93000 32.97000 29.55000 28.78000
## [15] 27.56000 14.43000 18.43504 14.92384 18.22611 23.72582 20.00091
## [22] 19.32084 16.97163 15.81763 17.01668 20.66849 19.09259 12.71566
## [29] 17.97008 28.49545 24.44389 25.02326 28.83070 38.26500 54.52109
## [36] 65.14406 72.38908 97.25597 61.67126 79.49553 111.25560 111.66970
## [43] 108.65852 98.94601 52.38676 43.73417 54.19244 71.31006 64.21057
## [50] 41.83835
GDP <- na.omit(tidy_gdpdollars$gdp)
print(GDP)
## [1] 7184863277 9664067930 14947491056 45412971909 46773303721
## [6] 64005623870 74188363506 80265732293 111859726429 164541658299
## [11] 184291888767 153238991869 129171602025 119624918956 103897892811
## [16] 86962013011 85695941760 88256162069 95344352824 117630271802
## [21] 132223268491 137087876662 132967901415 135174886489 143343036342
## [26] 158662398745 165963557410 146775498080 161716960000 189514926213
## [31] 184137469733 189605920240 215807655253 258742263040 328459700115
## [36] 376900135728 415964583055 519796738640 429097899280 528207332640
## [41] 671238840107 735974843360 746647127413 756350347333 654269902880
## [46] 644935541440 688586244293 786521831573 792966838160 700117873253
print(cor(Oil, GDP))
## [1] 0.8649575
df <- data.frame(Oil,GDP, tidy_gdpdollars$year)
print(tidy_gdpdollars)
## # A tibble: 50 × 6
## `Series Name` `Series Code` `Country Name` `Country Code` year gdp
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1971 7.18e 9
## 2 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1972 9.66e 9
## 3 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1973 1.49e10
## 4 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1974 4.54e10
## 5 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1975 4.68e10
## 6 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1976 6.40e10
## 7 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1977 7.42e10
## 8 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1978 8.03e10
## 9 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1979 1.12e11
## 10 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1980 1.65e11
## # … with 40 more rows
print (oil)
## # A tibble: 50 × 6
## `Country Code` `Series Code` year gdp `Country Name` `Series Name`
## <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 World OWID_WRL 1971 2.24 Oil Price Oil
## 2 World OWID_WRL 1972 2.48 Oil Price Oil
## 3 World OWID_WRL 1973 3.29 Oil Price Oil
## 4 World OWID_WRL 1974 11.6 Oil Price Oil
## 5 World OWID_WRL 1975 11.5 Oil Price Oil
## 6 World OWID_WRL 1976 12.8 Oil Price Oil
## 7 World OWID_WRL 1977 13.9 Oil Price Oil
## 8 World OWID_WRL 1978 14.0 Oil Price Oil
## 9 World OWID_WRL 1979 31.6 Oil Price Oil
## 10 World OWID_WRL 1980 36.8 Oil Price Oil
## # … with 40 more rows
df <- rbind(tidy_gdpdollars, oil)
print(df)
## # A tibble: 100 × 6
## `Series Name` `Series Code` `Country Name` `Country Code` year gdp
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1971 7.18e 9
## 2 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1972 9.66e 9
## 3 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1973 1.49e10
## 4 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1974 4.54e10
## 5 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1975 4.68e10
## 6 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1976 6.40e10
## 7 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1977 7.42e10
## 8 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1978 8.03e10
## 9 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1979 1.12e11
## 10 Saudi Arabia SAU GDP (current US$) NY.GDP.MKTP.CD 1980 1.65e11
## # … with 90 more rows
tidy_gdpdollars %>%
ggplot( aes(x=year, y=gdp))+ geom_line()+
theme(legend.position = 'none') +
labs(y = 'US Dollar ($)') +
labs(x = 'Year') +
ggtitle(paste('Saudi Arabia Real GDP in US $ (1971 - 2020)'))
oil %>%
ggplot( aes(x=year, y=gdp))+ geom_line()+
theme(legend.position = 'none') +
labs(y = 'US Dollar ($)') +
labs(x = 'Year') +
ggtitle(paste('Oil Prices in US $ (1971 - 2020)'))
#additional visual for GDP
# Use ggplot2's functions to visualize insights.
tidy_gdp_yoy %>%
ggplot( aes(x=year, y=gdp, color=`gdp`)) + geom_line() +
labs(y = '%') +
labs(x = 'Year') +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle(paste('Real GDP growth (annual %)'))
#Data Source -> world bank (https://databank.worldbank.org/source/world-development-indicators#)