options(warn=-1)
library(ggplot2)
library(TSA)
library(readxl)
library(stringi)
library(zoo)
path = '/Users/mareksalamon/Desktop/School/Hunter/Spring Semester 2019/Time Series Analysis (715)/Final Project'
setwd(path)
data.nyc <- read_xlsx('Data/total_tech_invs_nyc.xlsx', col_names = TRUE, range = 'A2:C98',
col_types = c('text','numeric','numeric'), na = '0', trim_ws = TRUE)
data.nyc$Quarter <- as.Date(as.yearqtr(data.nyc$Quarter, format = "Q%q %Y"))
data <- read.csv('Data/all_tech_invs_nyc.csv')
head(data)
## Quarter Number.of.deals Amounts Sector
## 1 1995-01-01 0 0 agriculture
## 2 1995-04-01 0 0 agriculture
## 3 1995-07-01 0 0 agriculture
## 4 1995-10-01 0 0 agriculture
## 5 1996-01-01 0 0 agriculture
## 6 1996-04-01 0 0 agriculture
For simplicity, we will look at the total amount of quarterly investment across all 20 sectors.
head(data.nyc)
## # A tibble: 6 x 3
## Quarter `Number of deals` Amounts
## <date> <dbl> <dbl>
## 1 1995-01-01 33 128280000
## 2 1995-04-01 22 108300000
## 3 1995-07-01 20 74240000
## 4 1995-10-01 22 74260000
## 5 1996-01-01 27 163330000
## 6 1996-04-01 25 166550000
Let’s take a look at the top 5 largest investments and smallest investments between 1995 and 2018.
# Top 5 largest investments
data.nyc[with(data.nyc,order(-Amounts)),][1:5,]
## # A tibble: 5 x 3
## Quarter `Number of deals` Amounts
## <date> <dbl> <dbl>
## 1 2018-07-01 163 5806640000
## 2 2017-07-01 220 4526000000
## 3 2000-01-01 239 3983720000
## 4 2000-07-01 205 3379210000
## 5 2017-10-01 217 2978320000
# Top 5 smallest investments
data.nyc[with(data.nyc,order(Amounts)),][1:5,]
## # A tibble: 5 x 3
## Quarter `Number of deals` Amounts
## <date> <dbl> <dbl>
## 1 1995-07-01 20 74240000
## 2 1995-10-01 22 74260000
## 3 1995-04-01 22 108300000
## 4 1995-01-01 33 128280000
## 5 1996-07-01 33 129000000
# Autocorrelation function of the data
acf(ts(data.nyc$Amounts),main = "Correlogram")