About file

This is the Rmd. script with reproducible code used to perform an analysis which was the base for the report In the search of music’s golden age. The code consists of all necessary data transformations and graphs presented in the report as well as some additional graphs enabling a slightly more expanded insight into the nature of data. The database is retrieved from RIAA site and presents recorded music sales and revenues divided by format within the time range of 1973-2019. The database is available here.

Libraries used

library(tidyverse)
library(car)
library(scales)
library(ggalluvial)
library(RColorBrewer)
library(data.table)
library(ggiraph)
library(ggridges)
library(viridis)
library(ggpubr)
library(ggforce)
library(gganimate)
library(forcats)
library(magick)

Initial preparation of the sales part

We began with importing the sales part of the database, consisting of the number of sales of music recordings divided by formats..

sales <- read.csv2('data/Sales.csv', dec =',', 
                   encoding =  'UTF-8')

The dataset had to be cleaned. Useful viariables were selected, renamed and transformed into more convenient form.

sales1 <- sales %>% dplyr::select(Year, Format, Value..Actual., Total.Value.For.Year) %>% 
  rename(Sales = Value..Actual., TotalYearSales = Total.Value.For.Year) %>% 
  separate(TotalYearSales, into = c("tr1", "SalesForYear", "tr2"), 
           sep = c(2,-1), remove = FALSE) %>% 
  select(Year, Format, Sales, SalesForYear) %>% 
  mutate(SalesForYear = as.numeric(gsub("," ,".", SalesForYear)))

sales1[sales1$Year == 2013 & sales1$Format == "DVD Audio",]$Sales
## [1] -0.05473517

The number of sales for one observation of the DVD Audio format for a year 2013 was negative and made no sense. We replaced this value with the mean of DVD Audio sales for 2012 and 2014 to obtain the likely value for this year and to keep the continuous character of the records. Next we ordered the data according to the time of occurence and created the first plot showing the music recordings sold in US between 1973 and 2019 divided by music format.

# missing proper data for DVD Audio in 2013
sales1[sales1$Year == 2013 & sales1$Format == "DVD Audio",]$Sales <- mean(c(sales1[sales1$Year %in% c(2012,2014) & sales1$Format == "DVD Audio",]$Sales))

#ordering Format category by the time of occurence
sall <- sales1 %>% arrange(Year)
orderedFormat <- sall[!duplicated(sall$Format), 'Format']
sales1 <- sales1 %>%  mutate(Format= fct_relevel(Format, orderedFormat))

Plot 1: Recorded music sold in US between 1973-2019 divided by format

ggplot(data = sales1, aes(x = Year, y = Sales, fill = Format)) + 
  geom_col(position = "stack", width=1)+
  labs(y = 'Number of records sold', x = 'Year',
       title = 'Changes in the formats of recorded music',
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/") + 
  scale_x_continuous(name="Year", breaks = seq(1973, 2019, 5))+
  scale_y_continuous(breaks = seq(0,2000, 250), labels = label_number(suffix= "M"))+
  scale_fill_manual(values=colorRampPalette(brewer.pal(8, "Paired"))(18))+
  theme_minimal()+
  theme(legend.title = element_blank())

Recoding sales for digital/physical type

Next, we created additional variable presenting the type of each format (whether the format is a digital or physical one).

sales2 <- sales1 %>% mutate(Type = ifelse(Format %in% 
                                            c('Download Album', 'Download Music Video', 
                                              'Download Single', 'Ringtones & Ringbacks',
                                              'Kiosk'),
                                          'Digital', 'Physical'))

Plot 2: Comparison of digital vs physical recordings sold (1973-2019).

Now we were able to create the plot with the comparison of those two types in the analysed time period.

mypalette <- brewer.pal(3,"Dark2") #to ensure a proper color order

ggplot(data = sales2, aes(x = Year, y = Sales, fill = Type)) + 
  geom_col(position = "stack", width=1)+
  labs(y = 'Number of records sold', x = 'Year',
       title = 'Digital transformation in the music industry',
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/") + 
  scale_x_continuous(name="Year", breaks = seq(1973, 2019, 3))+
  scale_y_continuous(breaks = seq(0,2000, 250), labels = label_number(suffix= "M"))+
  annotate(geom="text", x=1993, y=300, label="Physical", color="black", fontface =2, size =7)+
  annotate(geom="text", x=2010, y=1000, label="Digital", color="black", fontface =2, size =7)+
  scale_fill_manual(values=mypalette[2:1]) +
  theme_minimal() +
  theme(legend.position = "none")

Aggregating sales dataset

For the purpose of creating a plot with the shares of digital recordings the data had to be aggregated with respect to the year and type of observation.

sales_agg <- sales2 %>% group_by(Year, Type) %>% summarise(TotSales = sum(Sales), SalesForYear2 = round(mean(SalesForYear),2))
sales_agg %>% tail()
## # A tibble: 6 x 4
## # Groups:   Year [3]
##    Year Type     TotSales SalesForYear2
##   <int> <chr>       <dbl>         <dbl>
## 1  2017 Digital     637.           743.
## 2  2017 Physical    106.           743.
## 3  2018 Digital     462.           532.
## 4  2018 Physical     70.5          532.
## 5  2019 Digital     386.           453.
## 6  2019 Physical     67.3          453.
sales_agg2 <- sales_agg %>% filter(Type == "Digital") %>% mutate(Share = TotSales/SalesForYear2*100)
sales_agg2
## # A tibble: 16 x 5
## # Groups:   Year [16]
##     Year Type    TotSales SalesForYear2 Share
##    <int> <chr>      <dbl>         <dbl> <dbl>
##  1  2004 Digital     144           958.  15.0
##  2  2005 Digital     553.         1302.  42.5
##  3  2006 Digital     940.         1588.  59.2
##  4  2007 Digital    1319          1852.  71.2
##  5  2008 Digital    1534.         1920.  79.9
##  6  2009 Digital    1515.         1828.  82.9
##  7  2010 Digital    1472.         1740.  84.6
##  8  2011 Digital    1569.         1825   86.0
##  9  2012 Digital    1591.         1803.  88.2
## 10  2013 Digital    1502.         1691.  88.8
## 11  2014 Digital    1304.         1458.  89.4
## 12  2015 Digital    1120.         1256.  89.2
## 13  2016 Digital     855.          970.  88.1
## 14  2017 Digital     637.          743.  85.8
## 15  2018 Digital     462.          532.  86.7
## 16  2019 Digital     386.          453.  85.1

Plot 3: Share of digital recordings (with respect to the total number of sold recordings).

ggplot(data = sales_agg2, aes(x = Year, y = Share)) + geom_col(fill = "#56B4E9") +
  geom_text(label = paste0(round(sales_agg2$Share,0),'%'), nudge_y = 2.2) + 
  labs(y = 'Share of digital recordings sold', x = 'Year',
       title = 'Changes in the sales structure with respect to digital formats',
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/") + 
  scale_x_continuous(name="Year", breaks = seq(2004, 2019, 1))+
  theme_minimal() +
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  labs(y = 'Share of digital records sold') +
  theme(legend.position="bottom")

Preparation of the revenues and merging both datasets

We imported and and prepared the data with the revenues part of the database in the same manner as the previous one.

rev <- read.csv2('data/Revenues.csv', dec =',', 
                 encoding =  'UTF-8')

rev1 <- rev %>% select(Year, Format, Value..Actual., Total.Value.For.Year) %>%
  rename(Revenues = Value..Actual., TotalYearRev = Total.Value.For.Year) %>%
  separate(TotalYearRev, into = c("tr1", "RevForYear", "tr2"), sep = c(2,-1), remove = FALSE) %>%
  select(Year, Format, Revenues, RevForYear) %>%
  mutate(RevForYear = as.numeric(gsub("," ,".", RevForYear)))

Again we recoded the data to obtain an information about the type of formats. This time beside digital and physical types, formats of steraming are also present in the data.

rev2 <- rev1 %>% mutate(Type = ifelse(Format %in% 
                                        c('8 - Track', 'Cassette', 
                                          'Cassette Single', 'CD',
                                          'CD Single', 'DVD Audio',
                                          'LP/EP', 'Music Video (Physical)', 
                                          'Other Tapes', 'Vinyl Single',
                                          'SACD'),
                                      'Physical', ifelse(Format %in% 
                                                           c('Download Album', 'Download Music Video',
                                                             'Download Single', 'Kiosk',
                                                             'Ringtones & Ringbacks', 'Other Digital'),
                                                         'Digital', 'Streaming')))

When the revenues part was ready we were able to merge both datasets and obtain a file with full information provided from the source.

data_all <- merge(sales2, rev2, by = c('Year', 'Format', 'Type'), all = T)

One more thing is the previously mentioned problem with the data for DVD Audio for year 2013. In case of revenues it is just a lack of data for this observation. Again we replaced it with a mean of respective adjacent observations.

data_all[data_all$Year == 2013 & data_all$Format == "DVD Audio",]$Revenues <- mean(c(data_all[data_all$Year %in% c(2012,2014) & data_all$Format == "DVD Audio",]$Revenues))

To simplify the analysis it may be usefull to reduce the number of main formats to some merged categories with particular respect to the technology used in the creation of the recording.

data_all$Category <-
  recode(
    data_all$Format,
    "c('8 - Track', 'Cassette', 'Cassette Single', 'Other Tapes') = 'Tapes';
    c('CD', 'CD Single', 'SACD') = 'CD';
    c('Download Album', 'Download Music Video', 'Download Single', 'Kiosk', 'Ringtones & Ringbacks', 'Other Digital') = 'Download';
    c('Limited Tier Paid Subscription', 'On-Demand Streaming (Ad-Supported)', 'Other Ad-Supported Streaming', 'Paid Subscription', 'SoundExchange Distributions') = 'Streaming';
    c('LP/EP', 'Vinyl Single') = 'Vinyl';"
  )

Having the fully prepared data we could aggregate it by new categories.

data_agg <- data_all %>% group_by(Year, Category) %>% summarise(Revenues = sum(Revenues, na.rm = T), Sales = sum(Sales, na.rm = T))

Plot 4: Changes in the revenue structure coming from the main recording formats

ggplot(data = data_agg,
       aes(x = Year, y = Revenues, alluvium = Category)) +
  geom_alluvium(aes(fill = Category, colour = Category),
                alpha = .75, decreasing = FALSE) +
  scale_x_continuous(breaks = seq(1973, 2019, 5)) +
  scale_y_continuous(breaks = seq(0, 15000, 5000), labels = label_number(suffix= "M")) +
  labs(y = 'Revenues from the records sold', x = 'Year',
       title = 'Changes in the revenues',
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/") + 
  theme_minimal() +
  scale_fill_brewer(type = "qual", palette = "Paired") +
  scale_color_brewer(type = "qual", palette = "Paired") +
  theme(legend.title = element_blank())

Plot 6: Lifecycle example of revenues generated from two consecutive major formats

6th plot presents the lifecycle of two dependent formats until 2003.

# data chosen to be showed on the plot
data_seg <- data_agg %>% 
  filter(Category %in% c("Vinyl", "Tapes")) %>%
  filter(Year<2004) %>%
  select(Year, Category, Revenues) %>%
  spread(key = Category, value=Revenues, -1)

tmp_date <- data_seg[data_seg$Year %in% c(seq(1973, 2003, 5)),]

# plot
ggplot(data = data_seg, aes(x = Vinyl, y = Tapes, label = Year)) +
  geom_point(color="#BC243C") +
  geom_segment(aes(xend = c(tail(Vinyl, n = -1), NA), 
                   yend = c(tail(Tapes, n = -1), NA)),
               arrow = arrow(length = unit(0.2,"cm"), type = 'closed'),
               color="#E15D44"
  ) +
  theme_minimal() +
  geom_text(data=tmp_date,
            color = "#5B5EA6",
            size = 4.8) +
  scale_x_continuous(breaks = seq(0, 4000, 1000), labels = label_number(suffix= "M")) +
  scale_y_continuous(breaks = seq(0, 4000, 1000), labels = label_number(suffix= "M")) +
  labs(title = "Lifecycle example of revenues from two consecutive major formats",
       x = "Vinyl", y = "Tapes",
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/")

Plot 7: Total revenues from different formats with respect to their types (additional)

Additionally we created the plot which shows the ranking of the total revenues obtained in years 1973-2019 from different categories of formats with respect to their types.

# calculating of total revenues
data_total <- data_all %>% 
  group_by(Format, Category, Type) %>%
  summarise(TotalRev = sum(Revenues, na.rm = T))

# plot
ggdotchart(data_total, x = "Format", y = "TotalRev", color = "Type",                               
           palette = c("#E47A2E", "#79C753", "#CD212A"), 
           sorting = "descending",                       
           rotate = TRUE,                                
           dot.size = 5,                                
           y.text.col = TRUE) + 
  theme_cleveland() +
  scale_y_continuous(breaks = seq(0, 200000, 50000), labels = label_number(suffix= "M")) +
  labs(title = "Total revenues by formats",
       y = "Total revenues",
       color = "Format type",
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/")

Plot 8: Share of total revenues from 1973-2019 coming from the most important formats

Share of total revenues, based on the main categories, showed above all the dominance of CD category over the rest. It is presented on the donut/vinyl plot below.

# calculating total revenues for the vinyl plot
data_total_vin <- data_agg2 %>% 
  group_by(Category2) %>%
  summarise(TotRev = sum(Revenues)) %>%
  mutate(fraction = TotRev / sum(TotRev),
         ymax = cumsum(fraction),
         ymin = c(0, head(ymax, n=-1)),
         labelPosition = (ymax + ymin) / 2,
         label = paste0(Category2, "\n", round(fraction*100, 1), "%"))

# plot
ggplot(data_total_vin, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=Category2)) +
  geom_rect() +
  geom_label(x=3.8, aes(y=labelPosition, label=label), size=3.2) +
  scale_fill_manual(values = brewer.pal(9,'GnBu')[c(2:9)]) +
  coord_polar(theta="y") +
  xlim(c(2.65, 4)) +
  theme_void() +
  theme(legend.position = "none",
        plot.title = element_text(size = 18)) +
  labs(title = "% of total revenues from years 1973-2019",
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/")

Plot 9: Changes of unitary revenues and impact of digital transformation (since 2004)

In the report we not only focused on the amount of sales and revenues but also calculated the appropriate average revenues per single music recording of the certain format for every year and analysed their relations with the changes on the music market. Below we presented the changes in unitary revenues with the special focus on the years from 2004.

# calculating revenues
data_agg <- data_agg %>%
  mutate(UnitRev = ifelse(Revenues == 0 | Sales == 0, NA, Revenues/Sales))

# plot
ggplot(data = data_agg[!data_agg$Category %in% c("Streaming", "Synchronization"),], aes(x = Year, y = UnitRev, group = Category)) +
  geom_line(aes(color = Category), linetype = 'F1', size = 1.3) +
  facet_zoom(x = Year > 2004) +
  scale_color_brewer(palette = "Dark2") +
  labs(title = "Changes of unitary revenues and impact of digital era since 2004",
       x = "Year", y = "Unitary revenues",
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/")

Plot 10: Changes in unitary revenues after the digital transformation

Emphasized period caused us to make a closer look at the size of changes of the unitary revenues in the recent years, presented on the 10th plot.

# data selection and transformation
data_slope <- data_agg[!data_agg$Category %in% c("Streaming", "Synchronization", "Tapes"),] %>% 
  filter(Year %in% c(2004, 2019)) %>%
  select(Year, Category, UnitRev) %>%
  spread(key = Year, value=UnitRev, 0)

left_label <- paste(data_slope$Category, round(data_slope$`2004`, 2),sep=", ")
right_label <- paste(data_slope$Category, round(data_slope$`2019`, 2),sep=", ")
data_slope$class <- ifelse((data_slope$`2019` - data_slope$`2004`) < 0, "red", "green")

# plot
ggplot(data_slope) + 
  geom_segment(aes(x = 1, xend = 2, y = `2004`, yend = `2019`, col = class), 
               size = .75, show.legend = F) + 
  geom_vline(xintercept = 1, linetype = "dashed", size = .1) + 
  geom_vline(xintercept = 2, linetype = "dashed", size = .1) +
  scale_color_manual(labels = c("Up", "Down"), 
                     values = c("green" = "#00A170", "red" = "#BC243C")) +
  xlim(.5, 2.5) + ylim(0, (1.1*(max(data_slope$`2004`, data_slope$`2019`)))) + 
  geom_text(label = left_label, y=data_slope$`2004`, x=rep(1, NROW(data_slope)), hjust=1.1, size=3.5) + 
  geom_text(label= right_label, y=data_slope$`2019`, x=rep(2, NROW(data_slope)), hjust=-0.1, size=3.5) + 
  geom_text(label="2004", x=1, y=1.1*(max(data_slope$`2004`, data_slope$`2019`)), hjust=1.2, size=5) + 
  geom_text(label="2019", x=2, y=1.1*(max(data_slope$`2004`, data_slope$`2019`)), hjust=-0.1, size=5) + 
  labs(title = "Changes in unitary revenues between years 2004 and 2019",
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/") +
  theme(plot.title = element_text(size = 18)) +
  theme_void()

Plot 11: Interactive ranking of unit revenues for different formats (additional)

The name of the format with the highest unitary revenues was changing much more dynamically than in case of number of the sales or the revenues. Below we presented the rank with the information about format’s place in this rank with the appropriate values of unitary and total revenues available after hovering the cursor over selected line and year.

# calculating unitary revenues for the whole, not-aggregated data
data_all <- data_all %>%
  mutate(UnitRev = ifelse(Revenues == 0 | Sales == 0, NA, Revenues/Sales))

# ranking data
data_rank <- data_all %>% 
  group_by(Year) %>% drop_na(UnitRev) %>%
  mutate(rank = order(order(UnitRev, decreasing=TRUE)))

# information to display
data_rank$tooltip <- paste(
  "Format: ", data_rank$Format,"<br/>", 
  "Unit revenues: ", round(data_rank$UnitRev, 2), "<br/>",
  "Total revenues: ", data_rank$Revenues, " mln<br/>",
  sep="") %>%
  sapply(htmltools::HTML)

# plot
g_int <- ggplot(data = data_rank, aes(x = Year, y = rank, group = Format)) +
  geom_line_interactive(aes(color = Format, alpha = -rank, tooltip = tooltip, data_id = Format), size = 2) +
  scale_y_reverse(breaks = seq(1, 13)) +
  scale_fill_manual(values=colorRampPalette(brewer.pal(8, "Paired"))(18)) +
  labs(title = "Ranking of unit revenues for different formats",
       x = "Year", y = "Ranking (the higher, the bigger unit revenues)",
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/") +
  theme(panel.grid.major = element_blank(),
        axis.ticks = element_blank(),
        legend.position = "none"
  )

ggiraph(code = print(g_int), hover_css = "cursor:pointer;stroke:red;")

Plot 12: Dispersion of unitary revenues for different types of formats

Different behaviour of the different formats can be clearly seen on the plot below.

ggplot(data_agg[!data_agg$Category %in% c("Download", "Streaming", "Synchronization"),], aes(x = UnitRev, y = Category, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 2, rel_min_height = 0.0005) +
  scale_fill_viridis(name = "Unitary revenues", option = "D") +
  theme_minimal() +
  labs(title = "Unitary revenue dispertion for different types of formats",
       x = "Unitary revenues", y = "Category",
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/") +
  theme(legend.position = 'none'
  )

Plot 13: Sources of revenue by recorded music type (additional)

Next we came back to the yearly revenues topic in order to present the changes in the revenues including the streaming formats, which could not be taken into consideration in the unitary revenues analysis. Despite that, many aspects should be considered with the awareness of streaming formats entering the stage in the recent years.

rev2 %>% group_by(Year, Type) %>% summarise(Sum = sum(Revenues)) %>% 
  mutate(Type= fct_relevel(Type, 
                           "Digital", "Streaming", "Physical")) %>% 
  ggplot(aes(x = Year, y = Sum, fill = Type)) +
  geom_col(position = "stack", width=1)+
  labs(y = 'Revenue generated', x = 'Year',
       title = 'Sources of revenue by recorded music type',
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/") + 
  scale_x_continuous(name="Year", breaks = seq(1973, 2019, 3))+
  scale_y_continuous(breaks = seq(0,15000, 2500), labels = label_number(prefix = "$",suffix= "M"))+
  scale_fill_manual(values=mypalette) +
  theme_minimal()

Plot 14: Changes in the revenue structure - including streaming (since 2005)

Shares of different format types in the total amount of yearly revenues since 2005 show how the changes of music eras proceeded and leave no doubts about the huge role the streaming has nowadays.

# aggregating data by types
merged <- data_all %>% group_by(Year, Type) %>%
  summarize(Sales = sum(Sales, na.rm = T), SalesYear = max(SalesForYear, na.rm = T),
            Revenues = sum(Revenues, na.rm = T), RevYear = max(RevForYear, na.rm = T)) %>%
  filter(Year > 2004) %>% 
  mutate(Percentage = Revenues/RevYear) %>% 
  mutate(Type= fct_relevel(Type, 
                           "Streaming", "Digital", "Physical"))

ggplot(merged, aes(x = Year, y = Percentage, fill = Type), show.legend = F)+
  geom_area(alpha = 0.6, size =1, colour = 'black')+
  labs(x = 'Year', y = "Percentage of revenue", 
       title = 'Changes in the revenue structure (since 2005)',
       caption = "Data source: RIAA https://www.riaa.com/u-s-sales-database/")+
  scale_y_continuous(labels = percent)+
  scale_x_continuous(name="Year", breaks = seq(2005, 2019, 1),limits=c(2005, 2019))+
  annotate(geom="text", x=2008, y=0.35, label="Physical", color="black", fontface =2, size =7)+
  annotate(geom="text", x=2012, y=0.65, label="Digital", color="black", fontface =2, size =7)+
  annotate(geom="text", x=2017, y=0.85, label="Streaming", color="black", fontface =2, size =7)+
  scale_fill_manual(values=mypalette[3:1])+
  theme_minimal() +
  theme(legend.position = "none")

Plot 15: Animated multiplot (additional)

Finally, we created an animated multiplot of number of sales divided by different categories of formats with respect to the total and unitary revenues to have a comprehensive view on the changes in relations of our variables.

# part 1: number of sales vs. revenues
p1 <- ggplot(data_agg[!data_agg$Category %in% c("Streaming", "Synchronization"),], aes(y = Revenues, x = Sales)) +
  geom_point(aes(color = Category), size = 3, show.legend = FALSE) +
  ggtitle("Year:") +
  scale_x_continuous(breaks = seq(0, 2000, 500), labels = label_number(suffix= "M")) +
  scale_y_continuous(breaks = seq(0, 15000, 5000), labels = label_number(suffix= "M")) +
  labs(y = "Revenues", x = "Number of Sales") +
  theme_minimal() +
  theme(plot.title = element_text(size = 22, hjust = 1),
        axis.title = element_text(size = 16),
        text = element_text(size = 14)) +
  scale_color_brewer(palette = "Dark2") +
  transition_states(Year) +
  shadow_mark(size = 2, past = T, future = T, alpha = 0.5)

p1_animate <- animate(p1, fps = 5) 

anim_save('p1.animate')

# part 2: number of sales vs. unitary revenues
p2 <- ggplot(data_agg[!data_agg$Category %in% c("Streaming", "Synchronization"),],
             aes(x = Sales, y = UnitRev)) +
  geom_point(aes(color = Category), size = 3) +
  ggtitle("{closest_state}") +
  scale_x_continuous(breaks = seq(0, 2000, 500), labels = label_number(suffix= "M")) +
  labs(y = "Unitary revenues", x = "Number of Sales") +
  theme_minimal() +
  theme(plot.title = element_text(size = 22, hjust = -0.1),
        axis.title = element_text(size = 16),
        text = element_text(size = 14)) +
  scale_color_brewer(palette = "Dark2") +
  transition_states(Year) +
  shadow_mark(size = 2, past = T, future = T, alpha = 0.5)

p2_animate <- animate(p2, fps = 5, width = 650) 

anim_save('p2.animate')
# loading and merging parts
p1_animated <- image_read('p1.animate')
p2_animated <- image_read('p2.animate')

anim_plot <- image_append(c(p1_animated[1], p2_animated[1]))

for(i in 2:100){
  combined <- image_append(c(p1_animated[i], p2_animated[i]))
  anim_plot <- c(anim_plot, combined)
}

# plot
anim_plot