rm(list=ls()) 

setwd("C:/Users/AKASH/Desktop/Term IV/Applied Econometrics for managers/Assignment 2")

library(psych) 
library(foreign) 
library(haven)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
#Bring the dataset into R.
mydata1 <- read_dta("macchiavello_morjaria_aer_2015.dta")

#(a) Rename the "age" variable as "age_rel".
names(mydata1)[names(mydata1) == 'age'] <- 'age_rel'

#Summary statistics
options(scipen=999) 
myvars <- c("weekq", "price", "age_rel", "conflict", "new", "period", "pastinter")
mydata2 <- mydata1[myvars]
describe(mydata2)
##           vars    n   mean     sd median trimmed    mad min     max   range
## weekq        1 1827 106.18 183.38  35.68   61.44  44.59 0.2 1694.39 1694.19
## price        2 1826  10.95   8.22   8.90    9.40   3.86 1.0   60.00   59.00
## age_rel      3 1940 599.49 410.33 549.00  575.90 524.10 1.0 1430.00 1429.00
## conflict     4 2548   0.53   0.50   1.00    0.53   0.00 0.0    1.00    1.00
## new          5 2548   0.30   0.46   0.00    0.25   0.00 0.0    1.00    1.00
## period       6 2548   0.92   0.80   1.00    0.90   1.48 0.0    2.00    2.00
## pastinter    7 2548 161.07 226.07  65.50  112.36  97.11 0.0 1341.00 1341.00
##            skew kurtosis   se
## weekq      3.43    15.49 4.29
## price      2.87    10.61 0.19
## age_rel    0.38    -1.00 9.32
## conflict  -0.10    -1.99 0.01
## new        0.89    -1.21 0.01
## period     0.15    -1.40 0.02
## pastinter  2.07     4.59 4.48
mydata3 <- na.omit(mydata2)
mydata3$pricesq<- mydata3$price^2
describe(mydata3)
##           vars    n   mean     sd median trimmed    mad min     max   range
## weekq        1 1826 106.23 183.42  35.75   61.48  44.60 0.2 1694.39 1694.19
## price        2 1826  10.95   8.22   8.90    9.40   3.86 1.0   60.00   59.00
## age_rel      3 1826 603.76 415.19 550.00  580.71 532.99 1.0 1430.00 1429.00
## conflict     4 1826   0.54   0.50   1.00    0.55   0.00 0.0    1.00    1.00
## new          5 1826   0.21   0.41   0.00    0.14   0.00 0.0    1.00    1.00
## period       6 1826   1.05   0.81   1.00    1.06   1.48 0.0    2.00    2.00
## pastinter    7 1826 209.90 240.55 120.00  164.03 146.41 0.0 1341.00 1341.00
## pricesq      8 1826 187.46 400.10  79.22   98.37  64.08 1.0 3600.00 3599.00
##            skew kurtosis   se
## weekq      3.43    15.48 4.29
## price      2.87    10.61 0.19
## age_rel    0.37    -1.04 9.72
## conflict  -0.17    -1.97 0.01
## new        1.40    -0.05 0.01
## period    -0.09    -1.45 0.02
## pastinter  1.80     3.35 5.63
## pricesq    5.37    34.38 9.36
#(b)Estimate the following relationships using ordinary least squares method and provide
#the results (coefficients table along with the Adjusted R-squared):

# weekq = B0 + B1price + B2pricesq + B3age_rel + B4new + error
reg1 <- lm(weekq ~ price + pricesq + age_rel + new, data = mydata3)
summary(reg1)
## 
## Call:
## lm(formula = weekq ~ price + pricesq + age_rel + new, data = mydata3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -179.16  -94.09  -44.06   12.72 1580.76 
## 
## Coefficients:
##              Estimate Std. Error t value             Pr(>|t|)    
## (Intercept) 160.36645   12.52959  12.799 < 0.0000000000000002 ***
## price        -6.99344    1.43084  -4.888           0.00000111 ***
## pricesq       0.08611    0.02932   2.937              0.00336 ** 
## age_rel       0.04346    0.01011   4.299           0.00001809 ***
## new         -93.34474   10.20531  -9.147 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 175.7 on 1821 degrees of freedom
## Multiple R-squared:  0.08489,    Adjusted R-squared:  0.08288 
## F-statistic: 42.23 on 4 and 1821 DF,  p-value: < 0.00000000000000022
#(c)Adding conflict variable and estimating again:weekq = B0 + B1price + B2pricesq + B3age_rel + B4new + B5conflict + error
reg2 <- lm(weekq ~ price + pricesq + age_rel + new + conflict, data = mydata3)
summary(reg2)
## 
## Call:
## lm(formula = weekq ~ price + pricesq + age_rel + new + conflict, 
##     data = mydata3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -198.62  -86.24  -44.88   23.75 1551.83 
## 
## Coefficients:
##               Estimate Std. Error t value             Pr(>|t|)    
## (Intercept) 117.949973  13.624253   8.657 < 0.0000000000000002 ***
## price        -5.781914   1.419872  -4.072    0.000048581226065 ***
## pricesq       0.077755   0.028921   2.689             0.007242 ** 
## age_rel       0.038867   0.009984   3.893             0.000103 ***
## new         -93.256329  10.059015  -9.271 < 0.0000000000000002 ***
## conflict     61.611653   8.356670   7.373    0.000000000000252 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 173.1 on 1820 degrees of freedom
## Multiple R-squared:  0.1114, Adjusted R-squared:  0.109 
## F-statistic: 45.65 on 5 and 1820 DF,  p-value: < 0.00000000000000022
#(d)Interact "conflict" and "new":weekq = B0 + B1price + B2pricesq + B3age_rel + B4new + B5conflict + B6(conflict*new) + error
reg3 <- lm(weekq ~ price + pricesq + age_rel + new + conflict + new*conflict, data = mydata3)
summary(reg3)
## 
## Call:
## lm(formula = weekq ~ price + pricesq + age_rel + new + conflict + 
##     new * conflict, data = mydata3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -208.86  -84.88  -34.68   13.46 1544.14 
## 
## Coefficients:
##                Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  103.006656  13.977619   7.369    0.000000000000259 ***
## price         -5.337128   1.416436  -3.768              0.00017 ***
## pricesq        0.064209   0.028942   2.219              0.02664 *  
## age_rel        0.042750   0.009974   4.286    0.000019136171475 ***
## new          -46.657821  14.592761  -3.197              0.00141 ** 
## conflict      79.852935   9.296292   8.590 < 0.0000000000000002 ***
## new:conflict -87.600119  19.963145  -4.388    0.000012090771288 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 172.3 on 1819 degrees of freedom
## Multiple R-squared:  0.1207, Adjusted R-squared:  0.1178 
## F-statistic: 41.63 on 6 and 1819 DF,  p-value: < 0.00000000000000022
#(e)Take the 'period' variable and convert it into dummy variables.
mydata3$dummy_season<- dummy.code(mydata3$period)

#(f) 
reg4 <- lm(weekq ~ price + pricesq + age_rel + new + conflict + new*conflict + dummy_season, data = mydata3)
summary(reg4)
## 
## Call:
## lm(formula = weekq ~ price + pricesq + age_rel + new + conflict + 
##     new * conflict + dummy_season, data = mydata3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -219.13  -82.43  -34.35   13.66 1529.93 
## 
## Coefficients: (1 not defined because of singularities)
##                Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)    95.04853   14.68852   6.471       0.000000000125 ***
## price          -5.61692    1.41956  -3.957       0.000078882120 ***
## pricesq         0.06857    0.02897   2.367             0.018034 *  
## age_rel         0.03867    0.01009   3.831             0.000132 ***
## new           -48.84883   14.59940  -3.346             0.000837 ***
## conflict       79.69012    9.28476   8.583 < 0.0000000000000002 ***
## dummy_season1  11.39624   10.05195   1.134             0.257056    
## dummy_season2  26.05999   10.20202   2.554             0.010718 *  
## dummy_season0        NA         NA      NA                   NA    
## new:conflict  -86.31575   19.94416  -4.328       0.000015874890 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 172.1 on 1817 degrees of freedom
## Multiple R-squared:  0.1239, Adjusted R-squared:  0.1201 
## F-statistic: 32.13 on 8 and 1817 DF,  p-value: < 0.00000000000000022
#(g)Now include the 'pastinter' variable and estimate the relationship again.
reg5 <- lm(weekq ~ price + pricesq + age_rel + new + conflict + new*conflict + dummy_season+ pastinter, mydata3)
summary(reg5)
## 
## Call:
## lm(formula = weekq ~ price + pricesq + age_rel + new + conflict + 
##     new * conflict + dummy_season + pastinter, data = mydata3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -334.98  -72.75  -28.02   29.02 1500.96 
## 
## Coefficients: (1 not defined because of singularities)
##                 Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)    61.429124  12.934469   4.749   0.0000022018340180 ***
## price          -0.882291   1.258546  -0.701               0.4834    
## pricesq         0.002613   0.025506   0.102               0.9184    
## age_rel        -0.149959   0.011916 -12.584 < 0.0000000000000002 ***
## new            -8.629390  12.890983  -0.669               0.5033    
## conflict       63.114844   8.156533   7.738   0.0000000000000166 ***
## dummy_season1   8.111196   8.798742   0.922               0.3567    
## dummy_season2  19.492689   8.933327   2.182               0.0292 *  
## dummy_season0         NA         NA      NA                   NA    
## pastinter       0.503333   0.021345  23.581 < 0.0000000000000002 ***
## new:conflict  -29.449533  17.621280  -1.671               0.0948 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 150.6 on 1816 degrees of freedom
## Multiple R-squared:  0.3293, Adjusted R-squared:  0.326 
## F-statistic: 99.06 on 9 and 1816 DF,  p-value: < 0.00000000000000022
#(h)Interact the 'pastinter' variable with the 'conflict' variable and estimate the model again.
reg6 <- lm(weekq ~ price + pricesq + age_rel + new + conflict + new*conflict + dummy_season + pastinter + conflict*pastinter, mydata3)
summary(reg6)
## 
## Call:
## lm(formula = weekq ~ price + pricesq + age_rel + new + conflict + 
##     new * conflict + dummy_season + pastinter + conflict * pastinter, 
##     data = mydata3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -368.19  -67.58  -28.81   28.67 1510.83 
## 
## Coefficients: (1 not defined because of singularities)
##                      Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)         76.161438  13.737761   5.544         0.0000000339 ***
## price               -0.996935   1.256056  -0.794             0.427472    
## pricesq              0.002901   0.025445   0.114             0.909246    
## age_rel             -0.151044   0.011893 -12.701 < 0.0000000000000002 ***
## new                -18.499043  13.242203  -1.397             0.162591    
## conflict            38.224916  11.386782   3.357             0.000804 ***
## dummy_season1        8.097088   8.777589   0.922             0.356405    
## dummy_season2       19.708230   8.912116   2.211             0.027133 *  
## dummy_season0              NA         NA      NA                   NA    
## pastinter            0.447535   0.027790  16.104 < 0.0000000000000002 ***
## new:conflict        -9.995770  18.648820  -0.536             0.592024    
## conflict:pastinter   0.099079   0.031708   3.125             0.001808 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 150.2 on 1815 degrees of freedom
## Multiple R-squared:  0.3329, Adjusted R-squared:  0.3292 
## F-statistic: 90.56 on 10 and 1815 DF,  p-value: < 0.00000000000000022
stargazer(list(reg1,reg2,reg3,reg4,reg5,reg6), keep.stat=c("n", "adj.rsq"), type="text")
## 
## ==================================================================================
##                                          Dependent variable:                      
##                    ---------------------------------------------------------------
##                                                 weekq                             
##                       (1)        (2)        (3)        (4)        (5)       (6)   
## ----------------------------------------------------------------------------------
## price              -6.993***  -5.782***  -5.337***  -5.617***   -0.882    -0.997  
##                     (1.431)    (1.420)    (1.416)    (1.420)    (1.259)   (1.256) 
##                                                                                   
## pricesq             0.086***   0.078***   0.064**    0.069**     0.003     0.003  
##                     (0.029)    (0.029)    (0.029)    (0.029)    (0.026)   (0.025) 
##                                                                                   
## age_rel             0.043***   0.039***   0.043***   0.039***  -0.150*** -0.151***
##                     (0.010)    (0.010)    (0.010)    (0.010)    (0.012)   (0.012) 
##                                                                                   
## new                -93.345*** -93.256*** -46.658*** -48.849***  -8.629    -18.499 
##                     (10.205)   (10.059)   (14.593)   (14.599)  (12.891)  (13.242) 
##                                                                                   
## conflict                      61.612***  79.853***  79.690***  63.115*** 38.225***
##                                (8.357)    (9.296)    (9.285)    (8.157)  (11.387) 
##                                                                                   
## dummy_season1                                         11.396     8.111     8.097  
##                                                      (10.052)   (8.799)   (8.778) 
##                                                                                   
## dummy_season2                                        26.060**  19.493**  19.708** 
##                                                      (10.202)   (8.933)   (8.912) 
##                                                                                   
## dummy_season0                                                                     
##                                                                                   
##                                                                                   
## pastinter                                                      0.503***  0.448*** 
##                                                                 (0.021)   (0.028) 
##                                                                                   
## new:conflict                             -87.600*** -86.316*** -29.450*   -9.996  
##                                           (19.963)   (19.944)  (17.621)  (18.649) 
##                                                                                   
## conflict:pastinter                                                       0.099*** 
##                                                                           (0.032) 
##                                                                                   
## Constant           160.366*** 117.950*** 103.007*** 95.049***  61.429*** 76.161***
##                     (12.530)   (13.624)   (13.978)   (14.689)  (12.934)  (13.738) 
##                                                                                   
## ----------------------------------------------------------------------------------
## Observations         1,826      1,826      1,826      1,826      1,826     1,826  
## Adjusted R2          0.083      0.109      0.118      0.120      0.326     0.329  
## ==================================================================================
## Note:                                                  *p<0.1; **p<0.05; ***p<0.01