The first 5 countries with top GDP and the last 5 countries with least GDP. Also, the prediction of average GDP of the years were xrayed.
df <- read.csv(file="https://raw.githubusercontent.com/nnaemeka-git/global-datasets/main/GDP%20dataset.csv", sep=",",skip=4)
glimpse(df)
## Rows: 266
## Columns: 65
## $ Country.Name <chr> "Aruba", "Africa Eastern and Southern", "Afghanistan", ~
## $ Country.Code <chr> "ABW", "AFE", "AFG", "AFW", "AGO", "ALB", "AND", "ARB",~
## $ Indicator.Name <chr> "GDP growth (annual %)", "GDP growth (annual %)", "GDP ~
## $ Indicator.Code <chr> "NY.GDP.MKTP.KD.ZG", "NY.GDP.MKTP.KD.ZG", "NY.GDP.MKTP.~
## $ X1960 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ X1961 <dbl> NA, 1.063696, NA, 1.898596, NA, NA, NA, NA, NA, 5.42784~
## $ X1962 <dbl> NA, 7.4535627, NA, 3.8160733, NA, NA, NA, NA, NA, -0.85~
## $ X1963 <dbl> NA, 5.7405204, NA, 7.0408881, NA, NA, NA, NA, NA, -5.30~
## $ X1964 <dbl> NA, 5.473950, NA, 5.233236, NA, NA, NA, NA, NA, 10.1302~
## $ X1965 <dbl> NA, 5.594137, NA, 4.175162, NA, NA, NA, NA, NA, 10.5694~
## $ X1966 <dbl> NA, 4.0587148, NA, -1.7964361, NA, NA, NA, NA, NA, -0.6~
## $ X1967 <dbl> NA, 5.813018, NA, -9.401674, NA, NA, NA, NA, NA, 3.1919~
## $ X1968 <dbl> NA, 4.0466086, NA, 1.4228191, NA, NA, NA, NA, NA, 4.822~
## $ X1969 <dbl> NA, 5.178724, NA, 15.107822, NA, NA, NA, NA, NA, 9.6795~
## $ X1970 <dbl> NA, 4.8559668, NA, 17.6189590, NA, NA, NA, NA, NA, 3.04~
## $ X1971 <dbl> NA, 5.100963, NA, 10.628708, NA, NA, 4.649465, NA, NA, ~
## $ X1972 <dbl> NA, 2.203884, NA, 3.212971, NA, NA, 8.149743, NA, NA, 1~
## $ X1973 <dbl> NA, 4.4587227, NA, 4.1007589, NA, NA, 7.7884672, NA, NA~
## $ X1974 <dbl> NA, 5.8393223, NA, 10.5332804, NA, NA, 5.6187897, NA, N~
## $ X1975 <dbl> NA, 1.4212719, NA, -1.9082403, NA, NA, 0.5422057, NA, N~
## $ X1976 <dbl> NA, 2.4446599, NA, 8.7734448, NA, NA, 3.3037870, 15.743~
## $ X1977 <dbl> NA, 0.7431271, NA, 4.3362697, NA, NA, 2.8385756, 8.2368~
## $ X1978 <dbl> NA, 1.6475414, NA, -2.5552025, NA, NA, 1.4630002, -0.69~
## $ X1979 <dbl> NA, 3.17439019, NA, 5.19298181, NA, NA, 0.04155719, 11.~
## $ X1980 <dbl> NA, 5.7072943, NA, 2.2789134, NA, NA, 2.2087276, 9.0629~
## $ X1981 <dbl> NA, 4.1814809, NA, -6.6352715, -4.4000012, 5.7456353, -~
## $ X1982 <dbl> NA, 0.20442452, NA, -3.09064074, 0.00000000, 2.94859680~
## $ X1983 <dbl> NA, -0.1678771, NA, -6.1737644, 4.2000014, 1.1049383, 1~
## $ X1984 <dbl> NA, 3.59996156, NA, 0.76628601, 6.00000216, -1.25159665~
## $ X1985 <dbl> NA, -0.3106704, NA, 5.4873474, 3.4999995, 1.7806440, 2.~
## $ X1986 <dbl> NA, 1.801654, NA, 1.338868, 2.900002, 5.637243, 3.25332~
## $ X1987 <dbl> 16.0784314, 3.6267585, NA, 1.2051095, 4.0827486, -0.787~
## $ X1988 <dbl> 18.648649, 4.244825, NA, 4.906503, 6.128890, -1.420040,~
## $ X1989 <dbl> 12.12984055, 2.64672228, NA, 2.32247485, 0.04162146, 9.~
## $ X1990 <dbl> 3.96140173, 0.05297704, NA, 6.43720769, -3.45009868, -9~
## $ X1991 <dbl> 7.96287250, -0.08690589, NA, 1.22080653, 0.99135930, -2~
## $ X1992 <dbl> 5.8823529, -2.1554827, NA, 2.6839716, -5.8382807, -7.18~
## $ X1993 <dbl> 7.3076923, -0.6660327, NA, -1.1609721, -23.9834174, 9.5~
## $ X1994 <dbl> 8.2039028, 2.0872612, NA, -0.2260965, 1.3393634, 8.3028~
## $ X1995 <dbl> 2.547144, 4.308948, NA, 2.011852, 15.000000, 13.322333,~
## $ X1996 <dbl> 1.185788, 5.410609, NA, 4.596463, 13.544370, 9.099999, ~
## $ X1997 <dbl> 7.046874, 3.433427, NA, 3.828704, 7.274277, -10.919984,~
## $ X1998 <dbl> 1.9919859, 1.6576824, NA, 3.6067293, 4.6911465, 8.83008~
## $ X1999 <dbl> 1.238042, 2.672356, NA, 1.403042, 2.181490, 12.889897, ~
## $ X2000 <dbl> 7.6165882, 3.4079519, NA, 3.6116575, 3.0546242, 6.95003~
## $ X2001 <dbl> -2.971257, 3.385073, NA, 5.667418, 4.205999, 8.290070, ~
## $ X2002 <dbl> -3.2736464, 4.0774651, NA, 9.9304160, 13.6656865, 4.539~
## $ X2003 <dbl> 1.9755473, 3.1566477, 8.8322778, 5.8730414, 2.9898500, ~
## $ X2004 <dbl> 7.9115635, 5.4234838, 1.4141180, 8.0173116, 10.9528618,~
## $ X2005 <dbl> 1.2143493, 6.3123412, 11.2297148, 6.0054279, 15.0289153~
## $ X2006 <dbl> 1.050608, 6.832111, 5.357403, 5.257805, 11.547683, 5.90~
## $ X2007 <dbl> 1.800226, 7.104249, 13.826320, 5.588151, 14.010018, 5.9~
## $ X2008 <dbl> -0.09070805, 4.79681982, 3.92498382, 6.17543276, 11.166~
## $ X2009 <dbl> -10.5197485, 1.0394013, 21.3905284, 6.1454385, 0.858712~
## $ X2010 <dbl> -3.6850294, 4.8097831, 14.3624415, 6.6419635, 4.4039325~
## $ X2011 <dbl> 3.446054750, 4.201991866, 0.426354785, 5.004874530, 3.4~
## $ X2012 <dbl> -1.3698630, 3.2409757, 12.7522871, 5.2726115, 8.5421876~
## $ X2013 <dbl> 4.19823232, 4.47030560, 5.60074466, 5.83138307, 4.95454~
## $ X2014 <dbl> 0.3000000, 4.0909182, 2.7245434, 5.8335387, 4.8226276, ~
## $ X2015 <dbl> 5.7000009, 2.7632428, 1.4513147, 2.7351660, 0.9435716, ~
## $ X2016 <dbl> 2.099999586, 2.004538890, 2.260314201, -0.001994532, -2~
## $ X2017 <dbl> 1.9999991, 2.8322876, 2.6470032, 2.1637475, -0.1472129,~
## $ X2018 <dbl> NA, 2.385829, 1.189228, 2.831539, -2.003630, 4.071301, ~
## $ X2019 <dbl> NA, 1.6729245, 3.9116034, 3.1474817, -0.6246443, 2.1736~
## $ X2020 <dbl> NA, -3.5751725, -1.9347782, -0.9789222, -4.0405100, -3.~
Transform dataset with pivot longer
df_long <- df %>%
pivot_longer(!c("Country.Name", "Country.Code", "Indicator.Name", "Indicator.Code"),names_to="Year",values_to="GDP")
head(df_long,n=15)
## # A tibble: 15 x 6
## Country.Name Country.Code Indicator.Name Indicator.Code Year GDP
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1960 NA
## 2 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1961 NA
## 3 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1962 NA
## 4 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1963 NA
## 5 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1964 NA
## 6 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1965 NA
## 7 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1966 NA
## 8 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1967 NA
## 9 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1968 NA
## 10 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1969 NA
## 11 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1970 NA
## 12 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1971 NA
## 13 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1972 NA
## 14 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1973 NA
## 15 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG X1974 NA
Get Year
df_long$Year <- as.numeric(unlist(str_match_all(df_long$Year,"\\d+..")))
Top 5 GDP
df_sel_val <- df_long %>% select(Country.Name,Country.Code,Year,GDP) %>%
group_by(Country.Name)%>%
summarise(Avg_GDP=round(mean(GDP,na.rm=TRUE),2),Count=sum(!is.na(GDP)))%>%arrange(desc(Avg_GDP))%>%
filter(is.na(Avg_GDP) == FALSE)
#Top 5
top_5_gdp <- df_sel_val[1:5,]
top_5_gdp
## # A tibble: 5 x 3
## Country.Name Avg_GDP Count
## <chr> <dbl> <int>
## 1 Equatorial Guinea 13.7 40
## 2 Bosnia and Herzegovina 8.97 26
## 3 Qatar 8.43 20
## 4 Oman 8.38 54
## 5 China 8.07 60
top_5_gdp%>% ggplot(aes(reorder(Country.Name,Avg_GDP),Avg_GDP))+
geom_col(fill="#8CD71A")+geom_text(aes(label=Avg_GDP),color="blue")+ labs(x="Country (billion $)",y="GDP (billion $)", title="Equatorial Guinea has the highest average GDP of 13.67 billion dollar followed by Bosnia\n and Herzegovina with a GDP of 8.97 billion dollar")
Least 5 GDP
least_5_gdp <- tail(df_sel_val,n=5)
least_5_gdp
## # A tibble: 5 x 3
## Country.Name Avg_GDP Count
## <chr> <dbl> <int>
## 1 American Samoa -0.97 17
## 2 Ukraine -1.04 33
## 3 Virgin Islands (U.S.) -1.15 16
## 4 Northern Mariana Islands -1.8 17
## 5 South Sudan -4.93 7
least_5_gdp%>% ggplot(aes(reorder(Country.Name,Avg_GDP),Avg_GDP))+
geom_col(fill="#D77E1A")+geom_text(aes(label=Avg_GDP),color="blue")+ labs(x="Country",y="GDP (billion $)", title="South Sudan has the least average GDP with with a negative balance\n of approximately 4.9 billion. Northern Mariana Islands is the second\n least in the list of 5 lowest average GDP")
World Average GDP Growth by Year
## # A tibble: 60 x 3
## Year Avg_GDP CounOfGDP
## <dbl> <dbl> <int>
## 1 1970 7.3 141
## 2 1969 6.79 144
## 3 1976 6.26 160
## 4 1964 6.22 128
## 5 1968 6.14 141
## 6 2004 5.97 251
## 7 2006 5.83 252
## 8 2007 5.76 253
## 9 1972 5.47 154
## 10 1974 5.44 154
## # ... with 50 more rows
#Plot
plot(df_sel_val2$Year,df_sel_val2$Avg_GDP)
#Correlation between year and average GDP
cor(df_sel_val2$Year,df_sel_val2$Avg_GDP)
## [1] -0.4327291
#Save model in an object
result <- lm(Avg_GDP~Year,data=df_sel_val2)
#Draw regression line
abline(result)
#Display statistics
summary(result)
##
## Call:
## lm(formula = Avg_GDP ~ Year, data = df_sel_val2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.2053 -0.6952 0.1485 0.8327 2.7638
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 93.48694 24.53146 3.811 0.000337 ***
## Year -0.04505 0.01232 -3.656 0.000554 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.653 on 58 degrees of freedom
## Multiple R-squared: 0.1873, Adjusted R-squared: 0.1732
## F-statistic: 13.36 on 1 and 58 DF, p-value: 0.0005541
#Display intercept and Slope
result
##
## Call:
## lm(formula = Avg_GDP ~ Year, data = df_sel_val2)
##
## Coefficients:
## (Intercept) Year
## 93.48694 -0.04505
#To know the contents of result
#names(result)
#View fitted values
#result$fitted
#Plot year against fitted values
plot(df_sel_val2$Year,result$fitted)
Create function to predict global GDP
Global_GDP <-function(year){
GDP <- vector()
for (i in 1:length(year)){
GDP[i] <- (result$coef[2]*year[i])+result$coef[1]
}
return(GDP)
}
Make Prediction
#Predict year 1970, 1969 and 2020 using created function above
Global_GDP(c(1970,1969,2020))
## [1] 4.737864 4.782915 2.485350
#Predict year 1970, 1969 and 2020 using regression default function
predict(result,data.frame(Year=c(1970,1969,2020)))
## 1 2 3
## 4.737864 4.782915 2.485350
#Make prediction for the years in the dataset
df_sel_val2$Predicted <-Global_GDP(df_sel_val2$Year)
df_sel_pred <- mutate(df_sel_val2,
PercentDifference=((abs((df_sel_val2$Avg_GDP-df_sel_val2$Predicted))/df_sel_val2$Predicted))*100)
head(df_sel_pred)
## # A tibble: 6 x 5
## Year Avg_GDP CounOfGDP Predicted PercentDifference
## <dbl> <dbl> <int> <dbl> <dbl>
## 1 1970 7.3 141 4.74 54.1
## 2 1969 6.79 144 4.78 42.0
## 3 1976 6.26 160 4.47 40.1
## 4 1964 6.22 128 5.01 24.2
## 5 1968 6.14 141 4.83 27.2
## 6 2004 5.97 251 3.21 86.2
To compare Year 2020 actual and predicted average GDP
#View the actual and predicted Average GDP for year 2020
pred_2020 <- df_sel_pred%>% filter(Year==2020)
pred_2020
## # A tibble: 1 x 5
## Year Avg_GDP CounOfGDP Predicted PercentDifference
## <dbl> <dbl> <int> <dbl> <dbl>
## 1 2020 -4.72 226 2.49 290.