We are provided with a csv file containing annual financial data for firms. We are going to utilize 3 columns name fyear = fiscal year; tic = firm ID, ni = net income.
For this we will be calculating the percent by which average annual net income decreases for firms during crisis years (2007-8) compared to pre-crisis years (2004-6)
Step 1 : Load the data and filter it .
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data = read.csv("V:/bkp N/FINC/Week4/Compustat 1990-2015 Lots.csv")
data <- data %>% select('fyear','tic','ni')
dataForCrisis <- data %>% filter(ni != 'NA', ni != 0 ,fyear >= 2004 & fyear <= 2008)
dataForCrisis <- dataForCrisis %>% group_by(dataForCrisis$tic) %>% filter(all(c(2004:2008 %in% dataForCrisis$fyear))) %>% ungroup
Step 2 :I have seperated the data in two data frames for pre-crisis and post crisis . After summerising the data I will merge both of them to calculate percentage change. For summarising the data we are calculating the annual net average income for the pre crisis and post crisis which are denoted by ‘ANIPRE’ and ‘ANIPOST’ respectively.
dfPre <- dataForCrisis %>% filter(fyear >= 2004 & fyear <= 2006)
PreCrisisData <- dfPre %>% group_by(tic) %>% summarise(ANIPRE =mean(ni))
head(PreCrisisData,5)
## # A tibble: 5 x 2
## tic ANIPRE
## <chr> <dbl>
## 1 "" -2.03
## 2 "0015B" 41.7
## 3 "0030B" 19.4
## 4 "0032A" 660
## 5 "0033A" 1305
dfPost <- dataForCrisis %>% filter(fyear >= 2007 & fyear <= 2008)
PostCrisisData <- dfPost %>% group_by(tic) %>% summarise(ANIPOST =mean(ni) )
head(PostCrisisData,5)
## # A tibble: 5 x 2
## tic ANIPOST
## <chr> <dbl>
## 1 "" -3.14
## 2 "0015B" -120.
## 3 "0030B" 9.87
## 4 "0032A" -4500.
## 5 "0033A" -5238.
Step 3 :Merging the two datframes of pre and post crisis and calculating the percentage change
finalCrisDf <- inner_join(PreCrisisData , PostCrisisData , by = c("tic"))
finalCrisDf$pchng = ((finalCrisDf$ANIPOST - finalCrisDf$ANIPRE)/abs(finalCrisDf$ANIPRE))*100
head(finalCrisDf,5)
## # A tibble: 5 x 4
## tic ANIPRE ANIPOST pchng
## <chr> <dbl> <dbl> <dbl>
## 1 "" -2.03 -3.14 -54.6
## 2 "0015B" 41.7 -120. -389.
## 3 "0030B" 19.4 9.87 -49.0
## 4 "0032A" 660 -4500. -782.
## 5 "0033A" 1305 -5238. -501.
Analysis : Based on the values it can be concluded that the net income has a very steep decline in its value . This crisis was result of crash of stock market which expanded globally . Net income contributes to a company’s assets and can therefore affect the book value . Thus it can be concluded that due to crisis there must be certain cost cutting which multiple firms might have considered like firing employees, shutting down offices , changing the value of utilities , changing cost price and affecting the real estates business .
dataMergeAbs <- inner_join(PreCrisisData , PostCrisisData , by = c("tic"))
dataMergeAbs$AbsNi = abs(dataMergeAbs$ANIPOST - dataMergeAbs$ANIPRE)
head(dataMergeAbs,5)
## # A tibble: 5 x 4
## tic ANIPRE ANIPOST AbsNi
## <chr> <dbl> <dbl> <dbl>
## 1 "" -2.03 -3.14 1.11
## 2 "0015B" 41.7 -120. 162.
## 3 "0030B" 19.4 9.87 9.49
## 4 "0032A" 660 -4500. 5160.
## 5 "0033A" 1305 -5238. 6542.
dataAbsLargestChanges <- dataMergeAbs %>% arrange(desc(AbsNi)) %>% select(tic,AbsNi) %>% top_n(10)
## Selecting by AbsNi
print(dataAbsLargestChanges)
## # A tibble: 10 x 2
## tic AbsNi
## <chr> <dbl>
## 1 AIG 57999.
## 2 FNMA 35503.
## 3 C 33091.
## 4 GM 31549
## 5 VOD 29826.
## 6 FMCC 29032.
## 7 BAC2 23378.
## 8 UBS 21383.
## 9 RBS 19925.
## 10 S 16889.
dataAbsSmallestChanges <- dataMergeAbs %>% arrange(AbsNi) %>% select(tic,AbsNi) %>% top_n(-10)
## Selecting by AbsNi
print(dataAbsSmallestChanges)
## # A tibble: 10 x 2
## tic AbsNi
## <chr> <dbl>
## 1 MDJT.1 0
## 2 AMRB 0.000167
## 3 ASOE 0.000333
## 4 CEHC 0.000833
## 5 VSYS 0.00100
## 6 ANML.1 0.00117
## 7 VODG 0.00117
## 8 IHT 0.00133
## 9 ORRMF 0.00233
## 10 AXRX 0.0035
finalCrisDf2 <- inner_join(PreCrisisData , PostCrisisData , by = c("tic"))
finalCrisDf2$pchng = abs(((finalCrisDf2$ANIPOST - finalCrisDf2$ANIPRE)/ abs(finalCrisDf2$ANIPRE))*100)
head(finalCrisDf2,3)
## # A tibble: 3 x 4
## tic ANIPRE ANIPOST pchng
## <chr> <dbl> <dbl> <dbl>
## 1 "" -2.03 -3.14 54.6
## 2 "0015B" 41.7 -120. 389.
## 3 "0030B" 19.4 9.87 49.0
LargestAbsPerChng <- finalCrisDf2 %>% arrange(pchng) %>% select(tic,pchng) %>% top_n(10)
## Selecting by pchng
head(LargestAbsPerChng,10)
## # A tibble: 10 x 2
## tic pchng
## <chr> <dbl>
## 1 FSCI 6.08e 4
## 2 3WCPSF 6.17e 4
## 3 CBEY 6.29e 4
## 4 DEK 6.97e 4
## 5 RAE 6.98e 4
## 6 0419B 9.59e 4
## 7 ENTN 9.90e 4
## 8 NHLD 1.34e 5
## 9 STAQ 1.42e 5
## 10 PGUS 1.89e19
SmallestAbsPerChng <- finalCrisDf2 %>% arrange(pchng) %>% select(tic,pchng) %>% top_n(-10)
## Selecting by pchng
head(SmallestAbsPerChng,10)
## # A tibble: 10 x 2
## tic pchng
## <chr> <dbl>
## 1 MDJT.1 0
## 2 AMRB 0.00208
## 3 UNT 0.0113
## 4 GXP 0.0427
## 5 SKH 0.0585
## 6 MCO 0.0661
## 7 CEHC 0.0896
## 8 WASH 0.107
## 9 KHD.Z 0.108
## 10 GAXIQ 0.133
finalCrisDf3 <- finalCrisDf %>% filter(pchng < 0)
smallstPerChng <- finalCrisDf3 %>% arrange(desc(pchng)) %>% select(tic,pchng) %>% top_n(10)
## Selecting by pchng
print(smallstPerChng)
## # A tibble: 10 x 2
## tic pchng
## <chr> <dbl>
## 1 UNT -0.0113
## 2 GXP -0.0427
## 3 SKH -0.0585
## 4 MCO -0.0661
## 5 GAXIQ -0.133
## 6 VLCM -0.142
## 7 ETCC -0.154
## 8 VCBI -0.167
## 9 TRNS -0.229
## 10 ASGR -0.235
largestPerChng <- finalCrisDf3 %>% arrange(pchng) %>% select(tic,pchng) %>% top_n(-10)
## Selecting by pchng
print(largestPerChng)
## # A tibble: 10 x 2
## tic pchng
## <chr> <dbl>
## 1 PGUS -1.89e19
## 2 NHLD -1.34e 5
## 3 ENTN -9.90e 4
## 4 0419B -9.59e 4
## 5 RAE -6.98e 4
## 6 DEK -6.97e 4
## 7 3WCPSF -6.17e 4
## 8 BCAS -6.04e 4
## 9 SUPR -5.87e 4
## 10 DGNG -5.44e 4
So we will be taking maximum value of net income during pre crisis year and find out the duration after crisis when its highest value was breached.
Step 1: Filtering and cleaning the data
dataForCrisis3 <- data %>% filter(fyear >= 2004 & fyear <= 2014)
df5 <- dataForCrisis3 %>% group_by(tic) %>% filter(all(c(2004:2014 %in% fyear))) %>% ungroup
head(df5,3)
## # A tibble: 3 x 3
## fyear tic ni
## <int> <chr> <dbl>
## 1 2004 AIR 15.5
## 2 2005 AIR 35.2
## 3 2006 AIR 58.7
Step 2: Calculate the maximum value of net income in pre crisis year
dfPre <- df5 %>% filter(fyear >= 2004 & fyear <= 2006)
PreCrisisData2 <- dfPre %>% group_by(tic) %>% summarise( MaxNI = max(ni))
PreCrisisData2 <- PreCrisisData2 %>% filter(MaxNI != "NA")
head(PreCrisisData2,5)
## # A tibble: 5 x 2
## tic MaxNI
## <chr> <dbl>
## 1 0033A 2552
## 2 0048A 139.
## 3 0051A 6.33
## 4 0070A 11.8
## 5 0071A 134.
Step 3 : Restructing the data and summarizing it to find the duration the firms have taken to recover from financial crisis . I have added the column recovery in order to seperate two data frames (1 : recovered , 2: not recovered) . Then I have seprated the two data frames . In the recovery one I have calculated the minimum year for which the value of recovery column is 1 for each firms and then calculated the difference between 2008 and the min year (YearsForRecovery ) . In the another data frame I have just selected the distinct value of firms and appended it with a value 0 for YearsForRecovery . Then I have combined both the dataframes and used aggregate function . In this way we won’t get any duplicate records and we will get the data of all firms who have recovered and not recovered from the crisis .
df5 <- df5 %>% filter(fyear>2008)
finalnewDf <- inner_join(df5 , PreCrisisData2 , by = c("tic"))
finalnewDf$f08 = 2008
head(finalnewDf,5)
## # A tibble: 5 x 5
## fyear tic ni MaxNI f08
## <int> <chr> <dbl> <dbl> <dbl>
## 1 2009 AIR 44.6 58.7 2008
## 2 2010 AIR 69.8 58.7 2008
## 3 2011 AIR 67.7 58.7 2008
## 4 2012 AIR 55 58.7 2008
## 5 2013 AIR 72.9 58.7 2008
finalnewDf2 <- finalnewDf %>% mutate(recovery = ifelse( ni >= MaxNI , 1 , 0 ))
head(finalnewDf2,5)
## # A tibble: 5 x 6
## fyear tic ni MaxNI f08 recovery
## <int> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2009 AIR 44.6 58.7 2008 0
## 2 2010 AIR 69.8 58.7 2008 1
## 3 2011 AIR 67.7 58.7 2008 1
## 4 2012 AIR 55 58.7 2008 0
## 5 2013 AIR 72.9 58.7 2008 1
finalnewDf2Y <- finalnewDf2 %>% filter(recovery == 1)
finalnewDf2N <- finalnewDf2 %>% filter(recovery == 0)
finalnewDf2YSummary <- finalnewDf2Y %>% group_by(tic) %>% summarise(YearsForRecovery = min(fyear)-2008)
head(finalnewDf2YSummary,5)
## # A tibble: 5 x 2
## tic YearsForRecovery
## <chr> <dbl>
## 1 0048A 3
## 2 0070A 1
## 3 0071A 5
## 4 0100A 2
## 5 0123A 4
finalnewDf2NSummary <- distinct(finalnewDf2N, tic)
finalnewDf2NSummary$YearsForRecovery = 0
head(finalnewDf2NSummary,5)
## # A tibble: 5 x 2
## tic YearsForRecovery
## <chr> <dbl>
## 1 AIR 0
## 2 AAL 0
## 3 CECE 0
## 4 AVX 0
## 5 PNW 0
finalData <- rbind(finalnewDf2YSummary ,finalnewDf2NSummary )
dataForRecoveryCrisis <- aggregate(finalData$YearsForRecovery, by = list(finalData$tic), FUN = sum)
dataForRecoveryCrisis <- dataForRecoveryCrisis %>% mutate(YearsRecover = ifelse(x > 0 , x ,'NA'))
dataForRecoveryCrisis = rename(dataForRecoveryCrisis , "tic" = Group.1 )
dataForRecoveryCrisis <- subset(dataForRecoveryCrisis , select = -x)
head(dataForRecoveryCrisis ,10)
## tic YearsRecover
## 1 0033A NA
## 2 0048A 3
## 3 0051A NA
## 4 0070A 1
## 5 0071A 5
## 6 0079A NA
## 7 0084A NA
## 8 0100A 2
## 9 0123A 4
## 10 0124A NA
dataForRecoveryCrisisSum <- dataForRecoveryCrisis %>% filter(YearsRecover != 'NA') %>% mutate(yearNum = as.numeric(YearsRecover))
dataForRecoveryCrisisSum <- subset(dataForRecoveryCrisisSum , c= - YearsRecover)
dataForRecoveryCrisisSum <- dataForRecoveryCrisisSum %>% summarise(mean_dd = mean(yearNum), sd_dd = sd(yearNum), n = n())
print(dataForRecoveryCrisisSum)
## mean_dd sd_dd n
## 1 2.146608 1.444468 2742
There are many companies who took long time to recover . Based on standard mean it can be concluded that after crisis it almost took 2.25 years for company to breach its highest level during which was calculated during pre crisis period .