About the project
In this R project, we will use 2 different datasets from Eurostat to evaluate the growth of E-Commerce in Europe.
Import data into R studio
First, we import our first dataset from a .xlsx file (This file contains data from 2010-2019)
library (tidyverse)
library (readxl)
Ecommerce_2019 <- read_excel ("C:/Users/vasil/OneDrive/Desktop/Data Analysis/R/Ecommerce/isoc_ec_ibuy_page_linear.xlsx" ,
col_types = c ("text" , "text" , "text" ,
"text" , "text" , "text" , "text" , "numeric" ,
"numeric" , "text" ))
head (Ecommerce_2019)
# A tibble: 6 × 10
DATAFLOW `LAST UPDATE` freq indic_is ind_type unit geo TIME_PERIOD
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
1 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOT… PC_I… AL 2018
2 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOT… PC_I… AL 2019
3 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOT… PC_I… AT 2010
4 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOT… PC_I… AT 2011
5 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOT… PC_I… AT 2012
6 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOT… PC_I… AT 2013
# ℹ 2 more variables: OBS_VALUE <dbl>, OBS_FLAG <chr>
[1] "DATAFLOW" "LAST UPDATE" "freq" "indic_is" "ind_type"
[6] "unit" "geo" "TIME_PERIOD" "OBS_VALUE" "OBS_FLAG"
Manipulate data
When we look at the dataset we see that the headers are kind of confusing and hard to understand. So first, let’s change the headers so we can communicate more efficiently, our dataset and findings.
Ecommerce_2019_fixed <- Ecommerce_2019 %>%
select (everything ()) %>%
rename (Time_frequency = freq,
Last_online_purchase_in = indic_is,
Age_group = ind_type,
People_who_used_internet_within = unit,
Country_Group = geo)
head (Ecommerce_2019_fixed)
# A tibble: 6 × 10
DATAFLOW `LAST UPDATE` Time_frequency Last_online_purchase…¹ Age_group
<chr> <chr> <chr> <chr> <chr>
1 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOTAL
2 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOTAL
3 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOTAL
4 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOTAL
5 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOTAL
6 ESTAT:ISOC_EC_I… 19/09/23 11:… A I_BLT12 IND_TOTAL
# ℹ abbreviated name: ¹Last_online_purchase_in
# ℹ 5 more variables: People_who_used_internet_within <chr>,
# Country_Group <chr>, TIME_PERIOD <dbl>, OBS_VALUE <dbl>, OBS_FLAG <chr>
names (Ecommerce_2019_fixed)
[1] "DATAFLOW" "LAST UPDATE"
[3] "Time_frequency" "Last_online_purchase_in"
[5] "Age_group" "People_who_used_internet_within"
[7] "Country_Group" "TIME_PERIOD"
[9] "OBS_VALUE" "OBS_FLAG"
As we look at our dataset again we see that except the headers, there are also a lot of values that are hard to understand, so let’s also fix that.
Ecommerce_2019_fixed$ Time_frequency[Ecommerce_2019_fixed$ Time_frequency == "A" ] <- "Annual"
Ecommerce_2019_fixed$ Last_online_purchase_in[Ecommerce_2019_fixed$ Last_online_purchase_in == "I_BLT12" ] <- "12_months"
Ecommerce_2019_fixed$ Age_group[Ecommerce_2019_fixed$ Age_group == "IND_TOTAL" ] <- "TOTAL"
Ecommerce_2019_fixed$ People_who_used_internet_within[Ecommerce_2019_fixed$ People_who_used_internet_within == "PC_IND_ILT12" ] <- "Last_year"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "AL" ] <- "Albania"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "AT" ] <- "Austria"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "BA" ] <- "Bosnia_and_Herzegovina"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "BE" ] <- "Belgium"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "BG" ] <- "Bulgaria"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "CH" ] <- "Switzerland"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "CY" ] <- "Cyprus"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "CZ" ] <- "Czechia"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "DE" ] <- "Germany"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "DK" ] <- "Denmark"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "EA" ] <- "Euro_Area(EA11_1999,EA12_2001,EA13_2007,EA15_2008,EA16_2009,EA17_2011,EA18_2014,EA19_2015,EA20_2023)"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "EE" ] <- "Estonia"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "EL" ] <- "Greece"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "ES" ] <- "Spain"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "EU15" ] <- "European_Union_15_countries(1995-2004)"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "EU25" ] <- "European_Union_25_countries(2004-2006)"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "EU27_2007" ] <- "European_Union_27_countries(2007-2013)"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "EU27_2020" ] <- "European_Union_27_countries(from_2020)"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "EU28" ] <- "European_Union_28_countries(2013-2020)"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "FI" ] <- "Finland"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "FR" ] <- "France"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "HR" ] <- "Croatia"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "HU" ] <- "Hungary"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "IE" ] <- "Ireland"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "IS" ] <- "Iceland"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "IT" ] <- "Italy"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "LT" ] <- "Lithuania"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "LU" ] <- "Luxembourg"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "LV" ] <- "Latvia"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "ME" ] <- "Montenegro"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "MK" ] <- "North_Macedonia"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "MT" ] <- "Malta"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "NL" ] <- "Netherlands"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "NO" ] <- "Norway"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "PL" ] <- "Poland"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "PT" ] <- "Portugal"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "RO" ] <- "Romania"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "RS" ] <- "Serbia"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "SE" ] <- "Sweden"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "SI" ] <- "Slovenia"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "SK" ] <- "Slovakia"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "TR" ] <- "Türkiye"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "UK" ] <- "United_Kingdom"
Ecommerce_2019_fixed$ Country_Group[Ecommerce_2019_fixed$ Country_Group == "XK" ] <- "Kosovo(Under_United_Nations_Security_Council_Resolution_1244/99)"
Ecommerce_2019_fixed %>%
glimpse () %>%
select (1 : 10 )
Rows: 383
Columns: 10
$ DATAFLOW <chr> "ESTAT:ISOC_EC_IBUY(1.0)", "ESTAT:ISOC…
$ `LAST UPDATE` <chr> "19/09/23 11:00:00", "19/09/23 11:00:0…
$ Time_frequency <chr> "Annual", "Annual", "Annual", "Annual"…
$ Last_online_purchase_in <chr> "12_months", "12_months", "12_months",…
$ Age_group <chr> "TOTAL", "TOTAL", "TOTAL", "TOTAL", "T…
$ People_who_used_internet_within <chr> "Last_year", "Last_year", "Last_year",…
$ Country_Group <chr> "Albania", "Albania", "Austria", "Aust…
$ TIME_PERIOD <dbl> 2018, 2019, 2010, 2011, 2012, 2013, 20…
$ OBS_VALUE <dbl> 8.19, 10.13, 56.50, 55.72, 59.78, 66.1…
$ OBS_FLAG <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
# A tibble: 383 × 10
DATAFLOW `LAST UPDATE` Time_frequency Last_online_purchase…¹ Age_group
<chr> <chr> <chr> <chr> <chr>
1 ESTAT:ISOC_EC_… 19/09/23 11:… Annual 12_months TOTAL
2 ESTAT:ISOC_EC_… 19/09/23 11:… Annual 12_months TOTAL
3 ESTAT:ISOC_EC_… 19/09/23 11:… Annual 12_months TOTAL
4 ESTAT:ISOC_EC_… 19/09/23 11:… Annual 12_months TOTAL
5 ESTAT:ISOC_EC_… 19/09/23 11:… Annual 12_months TOTAL
6 ESTAT:ISOC_EC_… 19/09/23 11:… Annual 12_months TOTAL
7 ESTAT:ISOC_EC_… 19/09/23 11:… Annual 12_months TOTAL
8 ESTAT:ISOC_EC_… 19/09/23 11:… Annual 12_months TOTAL
9 ESTAT:ISOC_EC_… 19/09/23 11:… Annual 12_months TOTAL
10 ESTAT:ISOC_EC_… 19/09/23 11:… Annual 12_months TOTAL
# ℹ 373 more rows
# ℹ abbreviated name: ¹Last_online_purchase_in
# ℹ 5 more variables: People_who_used_internet_within <chr>,
# Country_Group <chr>, TIME_PERIOD <dbl>, OBS_VALUE <dbl>, OBS_FLAG <chr>
Data visualization
Now that we made our dataset more visually friendly, let’s check the growth of E-Commerce for “European_Union_27_countries(from_2020)”.
We are going to use a line graph with points marks for the visual presentation.
Ecommerce_2019_fixed %>%
filter (Country_Group == "European_Union_27_countries(from_2020)" ) %>%
ggplot (aes (TIME_PERIOD,
OBS_VALUE)) +
geom_point (size = 5 , colour = "blue" , alpha = .3 ) +
geom_line (color = "blue" , size = 1 ) +
scale_x_continuous (breaks = round (seq (min (Ecommerce_2019_fixed$ TIME_PERIOD), max (Ecommerce_2019_fixed$ TIME_PERIOD), by = 1 ))) +
labs (title = "E-Commerce growth in the EU(27 countries from 2020)" ,
x = "Year" ,
y = "Percentage of internet users who made a purchase online" ,
caption = "Source: Eurostat(Online data code: isoc_ec_ibuy)" ) +
theme_minimal () +
theme (plot.title =
element_text (size = 18 ,
face = "bold" ,
color = "steelblue" ,
hjust = 0.5 ),
axis.text =
element_text (size = 10 ,
color = "grey" ,
face = "bold" ),
axis.title =
element_text (size = 12 ,
color = "steelblue" ))
We can also check any specific country, to see what their data tells us about its growth in E-Commerce popularity.
Let’s take for example Greece.
Ecommerce_2019_fixed %>%
filter (Country_Group == "Greece" ) %>%
ggplot (aes (TIME_PERIOD,
OBS_VALUE)) +
geom_point (size = 5 , colour = "blue" , alpha = .3 ) +
geom_line (color = "blue" , size = 1 ) +
scale_x_continuous (breaks = round (seq (min (Ecommerce_2019_fixed$ TIME_PERIOD), max (Ecommerce_2019_fixed$ TIME_PERIOD), by = 1 ))) +
labs (title = "E-Commerce growth in Greece" ,
x = "Year" ,
y = "Percentage of internet users who made a purchase online" ,
caption = "Source: Eurostat(online data code: isoc_ec_ibuy)" ) +
theme_bw () +
theme (plot.title =
element_text (size = 18 ,
face = "bold" ,
color = "steelblue" ,
hjust = 0.5 ),
axis.text =
element_text (size = 10 ,
color = "grey" ,
face = "bold" ),
axis.title =
element_text (size = 12 ,
color = "steelblue" ))
2nd dataset
As insightful this dataset is, unfortunately, it only contains data up to 2019.
So we have to find a new dataset for the latest years as well.
library (readxl)
Ecommerce_2022 <- read_excel ("C:/Users/vasil/OneDrive/Desktop/Data Analysis/R/Ecommerce/isoc_ec_ib20_page_linear.xlsx" ,
col_types = c ("text" , "text" , "text" ,
"text" , "text" , "text" , "text" , "numeric" ,
"numeric" , "text" ))
head (Ecommerce_2022)
# A tibble: 6 × 10
DATAFLOW `LAST UPDATE` freq ind_type indic_is unit geo TIME_PERIOD
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
1 ESTAT:ISOC_EC_I… 19/09/23 11:… A IND_TOT… I_BLT12 PC_I… AL 2020
2 ESTAT:ISOC_EC_I… 19/09/23 11:… A IND_TOT… I_BLT12 PC_I… AL 2021
3 ESTAT:ISOC_EC_I… 19/09/23 11:… A IND_TOT… I_BLT12 PC_I… AL 2022
4 ESTAT:ISOC_EC_I… 19/09/23 11:… A IND_TOT… I_BLT12 PC_I… AT 2020
5 ESTAT:ISOC_EC_I… 19/09/23 11:… A IND_TOT… I_BLT12 PC_I… AT 2021
6 ESTAT:ISOC_EC_I… 19/09/23 11:… A IND_TOT… I_BLT12 PC_I… AT 2022
# ℹ 2 more variables: OBS_VALUE <dbl>, OBS_FLAG <chr>
This new dataset comes from the same source.
So we are gonna have to repeat the previous steps to make our data more visually understandable.
Fixing names of specific values
Ecommerce_2022_fixed$ Time_frequency[Ecommerce_2022_fixed$ Time_frequency == "A" ] <- "Annual"
Ecommerce_2022_fixed$ Age_group[Ecommerce_2022_fixed$ Age_group == "IND_TOTAL" ] <- "TOTAL"
Ecommerce_2022_fixed$ Last_online_purchase_in[Ecommerce_2022_fixed$ Last_online_purchase_in == "I_BLT12" ] <- "12_months"
Ecommerce_2022_fixed$ People_who_used_internet_within[Ecommerce_2022_fixed$ People_who_used_internet_within == "PC_IND_ILT12" ] <- "Last_year"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "AL" ] <- "Albania"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "AT" ] <- "Austria"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "BA" ] <- "Bosnia_and_Herzegovina"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "BE" ] <- "Belgium"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "BG" ] <- "Bulgaria"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "CH" ] <- "Switzerland"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "CY" ] <- "Cyprus"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "CZ" ] <- "Czechia"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "DE" ] <- "Germany"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "DK" ] <- "Denmark"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "EA" ] <- "Euro_Area(EA11_1999,EA12_2001,EA13_2007,EA15_2008,EA16_2009,EA17_2011,EA18_2014,EA19_2015,EA20_2023)"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "EE" ] <- "Estonia"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "EL" ] <- "Greece"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "ES" ] <- "Spain"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "EU27_2020" ] <- "European_Union_27_countries(from_2020)"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "FI" ] <- "Finland"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "FR" ] <- "France"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "HR" ] <- "Croatia"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "HU" ] <- "Hungary"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "IE" ] <- "Ireland"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "IS" ] <- "Iceland"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "IT" ] <- "Italy"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "LT" ] <- "Lithuania"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "LU" ] <- "Luxembourg"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "LV" ] <- "Latvia"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "ME" ] <- "Montenegro"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "MK" ] <- "North_Macedonia"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "MT" ] <- "Malta"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "NL" ] <- "Netherlands"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "NO" ] <- "Norway"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "PL" ] <- "Poland"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "PT" ] <- "Portugal"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "RO" ] <- "Romania"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "RS" ] <- "Serbia"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "SE" ] <- "Sweden"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "SI" ] <- "Slovenia"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "SK" ] <- "Slovakia"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "TR" ] <- "Türkiye"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "UK" ] <- "United_Kingdom"
Ecommerce_2022_fixed$ Country_Group[Ecommerce_2022_fixed$ Country_Group == "XK" ] <- "Kosovo(Under_United_Nations_Security_Council_Resolution_1244/99)"
Ecommerce_2022_fixed %>%
glimpse () %>%
select (1 : 10 )
Rows: 110
Columns: 10
$ DATAFLOW <chr> "ESTAT:ISOC_EC_IB20(1.0)", "ESTAT:ISOC…
$ `LAST UPDATE` <chr> "19/09/23 11:00:00", "19/09/23 11:00:0…
$ Time_frequency <chr> "Annual", "Annual", "Annual", "Annual"…
$ Age_group <chr> "TOTAL", "TOTAL", "TOTAL", "TOTAL", "T…
$ Last_online_purchase_in <chr> "12_months", "12_months", "12_months",…
$ People_who_used_internet_within <chr> "Last_year", "Last_year", "Last_year",…
$ Country_Group <chr> "Albania", "Albania", "Albania", "Aust…
$ TIME_PERIOD <dbl> 2020, 2021, 2022, 2020, 2021, 2022, 20…
$ OBS_VALUE <dbl> 19.52, 21.44, 34.67, 74.48, 67.98, 69.…
$ OBS_FLAG <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
# A tibble: 110 × 10
DATAFLOW `LAST UPDATE` Time_frequency Age_group Last_online_purchase…¹
<chr> <chr> <chr> <chr> <chr>
1 ESTAT:ISOC_EC_… 19/09/23 11:… Annual TOTAL 12_months
2 ESTAT:ISOC_EC_… 19/09/23 11:… Annual TOTAL 12_months
3 ESTAT:ISOC_EC_… 19/09/23 11:… Annual TOTAL 12_months
4 ESTAT:ISOC_EC_… 19/09/23 11:… Annual TOTAL 12_months
5 ESTAT:ISOC_EC_… 19/09/23 11:… Annual TOTAL 12_months
6 ESTAT:ISOC_EC_… 19/09/23 11:… Annual TOTAL 12_months
7 ESTAT:ISOC_EC_… 19/09/23 11:… Annual TOTAL 12_months
8 ESTAT:ISOC_EC_… 19/09/23 11:… Annual TOTAL 12_months
9 ESTAT:ISOC_EC_… 19/09/23 11:… Annual TOTAL 12_months
10 ESTAT:ISOC_EC_… 19/09/23 11:… Annual TOTAL 12_months
# ℹ 100 more rows
# ℹ abbreviated name: ¹Last_online_purchase_in
# ℹ 5 more variables: People_who_used_internet_within <chr>,
# Country_Group <chr>, TIME_PERIOD <dbl>, OBS_VALUE <dbl>, OBS_FLAG <chr>
Merging the 2 datasets
Now, before we start exploring our new dataset, let’s first conduct a merge with the first one, so we can have a more complete image.
Ecommerce_2010_2022_merged <- rbind (Ecommerce_2019_fixed, Ecommerce_2022_fixed)
Data visualization of the merged datasets
Now, let’s take a look again at the growth of E-Commerce in “European_Union_27_countries(from_2020)”.
Ecommerce_2010_2022_merged %>%
filter (Country_Group == "European_Union_27_countries(from_2020)" ) %>%
ggplot (aes (TIME_PERIOD,
OBS_VALUE)) +
geom_point (size = 5 , colour = "blue" , alpha = .3 ) +
geom_line (colour = "blue" , size = 1 ) +
scale_x_continuous (breaks = round (seq (min (Ecommerce_2010_2022_merged$ TIME_PERIOD), max (Ecommerce_2010_2022_merged$ TIME_PERIOD), by = 1 ))) +
labs (title = "E-Commerce growth in the EU(27 countries from 2020)" ,
x = "Year" ,
y = "Percentage of internet users who made a purchase online" ,
caption = "Source: Eurostat(online data codes: isoc_ec_ibuy and isoc_ec_ib20)" ) +
theme_minimal () +
theme (plot.title =
element_text (size = 18 ,
face = "bold" ,
colour = "steelblue" ,
hjust = 0.5 ),
axis.text =
element_text (size = 10 ,
colour = "grey" ,
face = "bold" ),
axis.title =
element_text (size = 12 ,
color = "steelblue" ))
And for Greece:
Ecommerce_2010_2022_merged %>%
filter (Country_Group == "Greece" ) %>%
ggplot (aes (TIME_PERIOD,
OBS_VALUE)) +
geom_point (size = 5 , colour = "blue" , alpha = .3 ) +
geom_line (colour = "blue" , size = 1 ) +
scale_x_continuous (breaks = round (seq (min (Ecommerce_2010_2022_merged$ TIME_PERIOD), max (Ecommerce_2010_2022_merged$ TIME_PERIOD), by = 1 ))) +
labs (title = "E-Commerce growth in Greece" ,
x = "Year" ,
y = "Percentage of internet users who made a purchase online" ,
caption = "Source: Eurostat(online data codes: isoc_ec_ibuy and isoc_ec_ib20)" ) +
theme_minimal () +
theme (plot.title =
element_text (size = 18 ,
face = "bold" ,
colour = "steelblue" ,
hjust = 0.5 ),
axis.text =
element_text (size = 10 ,
colour = "grey" ,
face = "bold" ),
axis.title =
element_text (size = 12 ,
color = "steelblue" ))
Conclusions:
As we see from our data the E-commerce section has been growing at a significant rate over the last 12 years in the EU and our example country, Greece, making it a very interesting and appealing investment.