Data Preprocessing

Importing the dataset

data <- read.csv("HEALTH_PHMC_24102017160155249.csv")

Filtering data with dplyr / tidyverse

data <- filter(data, Measure == "Million US$ at exchange rate")
data <- select(data, year = Year, country = Country, value = Value)

Creating data frame with worldwide revenue each year

totals <- aggregate(data$value, by=list(data$year), FUN = sum)
totals <- totals[1:35,]
names(totals) <- c("year", "value")

Creating a dummy data frame to use as the test set.

The test set is a dummy variable that we will use to form predictions

test.set <- data.frame("year" = 2015:2027, "value" = 1:13)

Creating our linear regression model and providing it data to predict

regressor <- lm(formula = value ~ year, data = totals)
value.pred <- predict(regressor, test.set)
value.pred <- data.frame("year" = 2015:2027, "value" = value.pred)

Combining predictions and original data into one data frame.

total.pred <- rbind(totals, value.pred)

Plotting our data

ggplot(data = total.pred) + aes(x = year, y = value) + 
  geom_point(color = "Blue") + 
  geom_smooth(method = "lm", color = "Green") + 
  geom_point(data = total.pred[36:48,], aes(x = year, y = value), 
             color = "Red") +
  ggtitle("Industry Growth over Time") +
  xlab("Year") + ylab('Value, Millions USD') +
  theme(title = element_text(size = 20),
        axis.title.x = element_text(color = "DarkBlue", size = 16),
        axis.title.y = element_text(color = "DarkBlue", size = 16))

LS0tDQp0aXRsZTogIkxpbmVhciBSZWdyZXNzaW9uIGFuZCBHcm93dGggb2YgdGhlIFBoYXJtYWNldXRpY2FsIEluZHVzdHJ5Ig0KYXV0aG9yOiAiRGFuaWVsIEJyb3duIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCmBgYHtyIGVjaG8gPSBGQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KYGBgDQoNCiMgRGF0YSBQcmVwcm9jZXNzaW5nDQojIyBJbXBvcnRpbmcgdGhlIGRhdGFzZXQNCg0KYGBge3J9DQpkYXRhIDwtIHJlYWQuY3N2KCJIRUFMVEhfUEhNQ18yNDEwMjAxNzE2MDE1NTI0OS5jc3YiKQ0KYGBgDQoNCiMjIEZpbHRlcmluZyBkYXRhIHdpdGggZHBseXIgLyB0aWR5dmVyc2UNCmBgYHtyfQ0KZGF0YSA8LSBmaWx0ZXIoZGF0YSwgTWVhc3VyZSA9PSAiTWlsbGlvbiBVUyQgYXQgZXhjaGFuZ2UgcmF0ZSIpDQpkYXRhIDwtIHNlbGVjdChkYXRhLCB5ZWFyID0gWWVhciwgY291bnRyeSA9IENvdW50cnksIHZhbHVlID0gVmFsdWUpDQpgYGANCg0KIyMgQ3JlYXRpbmcgZGF0YSBmcmFtZSB3aXRoIHdvcmxkd2lkZSByZXZlbnVlIGVhY2ggeWVhcg0KYGBge3J9DQp0b3RhbHMgPC0gYWdncmVnYXRlKGRhdGEkdmFsdWUsIGJ5PWxpc3QoZGF0YSR5ZWFyKSwgRlVOID0gc3VtKQ0KdG90YWxzIDwtIHRvdGFsc1sxOjM1LF0NCm5hbWVzKHRvdGFscykgPC0gYygieWVhciIsICJ2YWx1ZSIpDQpgYGANCg0KIyMgQ3JlYXRpbmcgYSBkdW1teSBkYXRhIGZyYW1lIHRvIHVzZSBhcyB0aGUgdGVzdCBzZXQuDQpUaGUgdGVzdCBzZXQgaXMgYSBkdW1teSB2YXJpYWJsZSB0aGF0IHdlIHdpbGwgdXNlIHRvIGZvcm0gcHJlZGljdGlvbnMNCmBgYHtyfQ0KdGVzdC5zZXQgPC0gZGF0YS5mcmFtZSgieWVhciIgPSAyMDE1OjIwMjcsICJ2YWx1ZSIgPSAxOjEzKQ0KYGBgDQoNCiMjIENyZWF0aW5nIG91ciBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbCBhbmQgcHJvdmlkaW5nIGl0IGRhdGEgdG8gcHJlZGljdA0KYGBge3J9DQpyZWdyZXNzb3IgPC0gbG0oZm9ybXVsYSA9IHZhbHVlIH4geWVhciwgZGF0YSA9IHRvdGFscykNCnZhbHVlLnByZWQgPC0gcHJlZGljdChyZWdyZXNzb3IsIHRlc3Quc2V0KQ0KdmFsdWUucHJlZCA8LSBkYXRhLmZyYW1lKCJ5ZWFyIiA9IDIwMTU6MjAyNywgInZhbHVlIiA9IHZhbHVlLnByZWQpDQpgYGANCg0KIyMgQ29tYmluaW5nIHByZWRpY3Rpb25zIGFuZCBvcmlnaW5hbCBkYXRhIGludG8gb25lIGRhdGEgZnJhbWUuDQpgYGB7cn0NCnRvdGFsLnByZWQgPC0gcmJpbmQodG90YWxzLCB2YWx1ZS5wcmVkKQ0KYGBgDQoNCiMjIFBsb3R0aW5nIG91ciBkYXRhIA0KYGBge3J9DQpnZ3Bsb3QoZGF0YSA9IHRvdGFsLnByZWQpICsgYWVzKHggPSB5ZWFyLCB5ID0gdmFsdWUpICsgDQogIGdlb21fcG9pbnQoY29sb3IgPSAiQmx1ZSIpICsgDQogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIsIGNvbG9yID0gIkdyZWVuIikgKyANCiAgZ2VvbV9wb2ludChkYXRhID0gdG90YWwucHJlZFszNjo0OCxdLCBhZXMoeCA9IHllYXIsIHkgPSB2YWx1ZSksIA0KICAgICAgICAgICAgIGNvbG9yID0gIlJlZCIpICsNCiAgZ2d0aXRsZSgiSW5kdXN0cnkgR3Jvd3RoIG92ZXIgVGltZSIpICsNCiAgeGxhYigiWWVhciIpICsgeWxhYignVmFsdWUsIE1pbGxpb25zIFVTRCcpICsNCiAgdGhlbWUodGl0bGUgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDIwKSwNCiAgICAgICAgYXhpcy50aXRsZS54ID0gZWxlbWVudF90ZXh0KGNvbG9yID0gIkRhcmtCbHVlIiwgc2l6ZSA9IDE2KSwNCiAgICAgICAgYXhpcy50aXRsZS55ID0gZWxlbWVudF90ZXh0KGNvbG9yID0gIkRhcmtCbHVlIiwgc2l6ZSA9IDE2KSkNCmBgYA0K