Ease of Doing Business

Introduction

This exercise utilises data from the World Bank, which showcases the “ease of doing business” metric by country and region. To guide the data exploration, I would like to know which countries, grouped by continent, has the greatest ease of doing business.

Step 1: Importing and cleaning data

# set your working directory

# import the data and skip the first two rows as they are not relevant for the analysis
headers <- read.csv("worldbank.csv", skip = 3, header = F, nrows = 1, as.is = T)
data <- read.csv("worldbank.csv", skip=5, header=F)
colnames(data) <- headers

# view the data
head(data, 5)

##                  Country Name Country Code
## 1                       Aruba          ABW
## 2 Africa Eastern and Southern          AFE
## 3                 Afghanistan          AFG
## 4  Africa Western and Central          AFW
## 5                      Angola          AGO
##                                                        Indicator Name
## 1 Ease of doing business index (1=most business-friendly regulations)
## 2 Ease of doing business index (1=most business-friendly regulations)
## 3 Ease of doing business index (1=most business-friendly regulations)
## 4 Ease of doing business index (1=most business-friendly regulations)
## 5 Ease of doing business index (1=most business-friendly regulations)
##   Indicator Code 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971
## 1 IC.BUS.EASE.XQ   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 2 IC.BUS.EASE.XQ   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 3 IC.BUS.EASE.XQ   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 4 IC.BUS.EASE.XQ   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 5 IC.BUS.EASE.XQ   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
##   1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986
## 1   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 2   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 3   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 4   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 5   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
##   1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001
## 1   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 2   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 3   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 4   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 5   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
##   2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
## 1   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 2   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 3   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 4   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 5   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
##   2017 2018 2019 2020 NA
## 1   NA   NA   NA   NA NA
## 2   NA   NA   NA   NA NA
## 3   NA   NA  173   NA NA
## 4   NA   NA   NA   NA NA
## 5   NA   NA  177   NA NA

# note the columns of the data
names(data)

##  [1] "Country Name"   "Country Code"   "Indicator Name" "Indicator Code"
##  [5] "1960"           "1961"           "1962"           "1963"          
##  [9] "1964"           "1965"           "1966"           "1967"          
## [13] "1968"           "1969"           "1970"           "1971"          
## [17] "1972"           "1973"           "1974"           "1975"          
## [21] "1976"           "1977"           "1978"           "1979"          
## [25] "1980"           "1981"           "1982"           "1983"          
## [29] "1984"           "1985"           "1986"           "1987"          
## [33] "1988"           "1989"           "1990"           "1991"          
## [37] "1992"           "1993"           "1994"           "1995"          
## [41] "1996"           "1997"           "1998"           "1999"          
## [45] "2000"           "2001"           "2002"           "2003"          
## [49] "2004"           "2005"           "2006"           "2007"          
## [53] "2008"           "2009"           "2010"           "2011"          
## [57] "2012"           "2013"           "2014"           "2015"          
## [61] "2016"           "2017"           "2018"           "2019"          
## [65] "2020"           "NA"

# check the number of rows and columns
dim(data)

## [1] 266  66

# narrow down the data to 2019 figures
data19 <- data.frame(data$`Country Name`, data$`2019`)

# remove NAs
data19 <- na.omit(data19)

# rename column names
names(data19) <- c("country", "metric")

# view clean data
head(data19,5)

##                 country metric
## 3           Afghanistan    173
## 5                Angola    177
## 6               Albania     82
## 9  United Arab Emirates     16
## 10            Argentina    126

# check the number of rows and columns
dim(data19)

## [1] 189   2

Step 2: Analysing data

# incorporate the continent group
library(countrycode)
data19$continent <- countrycode(sourcevar=data19[,"country"], origin = "country.name", destination="continent")

## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: Kosovo

# remove countries with no mapping
sum(is.na(data19))

## [1] 1

data19 <- na.omit(data19)
head(data19,8)

##                 country metric continent
## 3           Afghanistan    173      Asia
## 5                Angola    177    Africa
## 6               Albania     82    Europe
## 9  United Arab Emirates     16      Asia
## 10            Argentina    126  Americas
## 11              Armenia     47      Asia
## 13  Antigua and Barbuda    113  Americas
## 14            Australia     14   Oceania

# use dplyr summarise, count and group_by functions
library(dplyr)
group_data19 <- data19 %>%
  group_by(continent) %>%
  summarise(mean=round(mean(metric),0), n=n())
group_data19

## # A tibble: 5 × 3
##   continent  mean     n
##   <chr>     <dbl> <int>
## 1 Africa      137    54
## 2 Americas    109    35
## 3 Asia         83    47
## 4 Europe       42    40
## 5 Oceania     108    12

# visualise the data
library(ggplot2)
ggplot(group_data19, aes(x=continent, y=mean)) + geom_bar(stat="identity", fill="blue") + labs(title="Bar Chart", subtitle="Mean of Ease of Doing Business Grouped by Continent", caption="Source: World Bank", x="Continent", y="Mean, Ease of Doing Business")

hist(data19$metric, breaks= 10, ylim=c(0,25), main="Ease of Doing Business by Country", col="#4cbea3", xlab="Country")

Ease of Doing Business

Ethel Ngiam

9/7/2021

Introduction

Step 1: Importing and cleaning data

Step 2: Analysing data

Conclusion