rm() 
setwd("C:/Users/AKASH/Desktop/Term IV/Applied Econometrics for managers/Assignment 1")

library(foreign)
library(psych) 
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
#Bring the dataset into R#
mydata1 <- read.dta("C:/Users/AKASH/Desktop/Term IV/Applied Econometrics for managers/Assignment 1/macchiavello_morjaria_aer_2015.dta")

#Subset dataset#
myvars <- c("weekq", "price", "age")
mydata <- mydata1[myvars]
mydata <- subset(mydata, price != "NA")

#Rename the "age" variable as "age_rel"#
names(mydata)[names(mydata) == "age"] <- "age_rel"

#Provide the summary statistics (mean, std dev, median, min, max, and number of observations) for the variables weekq, price, and age
describe(mydata)
##         vars    n   mean     sd median trimmed    mad min     max   range skew
## weekq      1 1826 106.23 183.42  35.75   61.48  44.60 0.2 1694.39 1694.19 3.43
## price      2 1826  10.95   8.22   8.90    9.40   3.86 1.0   60.00   59.00 2.87
## age_rel    3 1826 603.76 415.19 550.00  580.71 532.99 1.0 1430.00 1429.00 0.37
##         kurtosis   se
## weekq      15.48 4.29
## price      10.61 0.19
## age_rel    -1.04 9.72
#Generate new variables#
mydata$log_weekq = log(mydata$weekq) 
mydata$log_price = log(mydata$price) 

#Estimating Linear Regression using OLS#
#i] weekq = β0 + β1price + e#
reg1 <- lm(mydata$weekq ~ mydata$price) 
summary(reg1)
## 
## Call:
## lm(formula = mydata$weekq ~ mydata$price)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -139.65  -91.43  -64.02    1.61 1584.92 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  143.2939     7.0689  20.271  < 2e-16 ***
## mydata$price  -3.3845     0.5163  -6.555 7.21e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 181.3 on 1824 degrees of freedom
## Multiple R-squared:  0.02302,    Adjusted R-squared:  0.02248 
## F-statistic: 42.97 on 1 and 1824 DF,  p-value: 7.208e-11
#Get predicted dependent variable and residuals#
weekq_predicted <- fitted(reg1)

#Obtaining R^2# 
var(weekq_predicted) / var(mydata$weekq)
## [1] 0.0230168
#ii] reg2 <- log(weekq) = β0 + β1log(price) + e#
reg2 <- lm(mydata$log_weekq ~ mydata$log_price)
summary(reg2)
## 
## Call:
## lm(formula = mydata$log_weekq ~ mydata$log_price)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.0192 -0.8953 -0.0252  0.9469  4.6222 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.67211    0.13352   42.48   <2e-16 ***
## mydata$log_price -0.96625    0.05849  -16.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.514 on 1824 degrees of freedom
## Multiple R-squared:  0.1302, Adjusted R-squared:  0.1297 
## F-statistic: 272.9 on 1 and 1824 DF,  p-value: < 2.2e-16
#Get predicted dependent variable and residuals for reg2# 
weekq_predicted02 <- fitted(reg2)

#Obtaining R^2 for reg2# 
var(weekq_predicted02) / var(mydata$weekq)
## [1] 1.019024e-05
#iii] reg3 <- log(weekq) = β0 + β1price+ e#
reg3 <- lm(mydata$log_weekq ~ mydata$price)
summary(reg3)
## 
## Call:
## lm(formula = mydata$log_weekq ~ mydata$price)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6401 -0.9107 -0.0082  0.9376  5.0740 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.368224   0.058513   74.65   <2e-16 ***
## mydata$price -0.075150   0.004274  -17.58   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.501 on 1824 degrees of freedom
## Multiple R-squared:  0.145,  Adjusted R-squared:  0.1445 
## F-statistic: 309.2 on 1 and 1824 DF,  p-value: < 2.2e-16
#Get predicted dependent variable and residuals for reg3# 
weekq_predicted03 <- fitted(reg3)

#Obtaining R^2 for reg3# 
var(weekq_predicted03) / var(mydata$weekq)
## [1] 1.134806e-05
#iv] reg4 <- weekq = β0 + β1log(price)+ e#
reg4 <- lm(mydata$weekq ~ mydata$log_price)
summary(reg4)
## 
## Call:
## lm(formula = mydata$weekq ~ mydata$log_price)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -198.25  -87.04  -62.17   -6.99 1592.39 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       198.514     16.024  12.389  < 2e-16 ***
## mydata$log_price  -41.926      7.019  -5.973 2.79e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 181.7 on 1824 degrees of freedom
## Multiple R-squared:  0.01918,    Adjusted R-squared:  0.01865 
## F-statistic: 35.68 on 1 and 1824 DF,  p-value: 2.792e-09
#Get predicted dependent variable and residuals for reg4# 
weekq_predicted04 <- fitted(reg4)

#Obtaining R^2 for reg4# 
var(weekq_predicted04) / var(mydata$weekq)
## [1] 0.01918491
#Comparing β1 for all 4 Regression Models#

#Extract the estimated coefficients from stored results# 
beta_estimated01 <- coef(reg1) 
beta_estimated02 <- coef(reg2) 
beta_estimated03 <- coef(reg3) 
beta_estimated04 <- coef(reg4) 

beta_estimated01
##  (Intercept) mydata$price 
##   143.293891    -3.384465
beta_estimated02
##      (Intercept) mydata$log_price 
##        5.6721095       -0.9662545
beta_estimated03
##  (Intercept) mydata$price 
##   4.36822449  -0.07514987
beta_estimated04
##      (Intercept) mydata$log_price 
##        198.51442        -41.92559
#Create a new variable: pricesq = price2#
mydata$price_sq <- mydata$price^2

#Estimating the relationship:reg5 <-  weekq = β0+ β1price + β2price_sq + β3age_rel + e#
reg5 <- lm(mydata$weekq ~ mydata$price + mydata$price_sq + mydata$age_rel, data=mydata)
summary(reg5)
## 
## Call:
## lm(formula = mydata$weekq ~ mydata$price + mydata$price_sq + 
##     mydata$age_rel, data = mydata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -172.16  -86.07  -55.22    2.63 1604.22 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     137.81910   12.56029  10.973  < 2e-16 ***
## mydata$price     -7.59477    1.46140  -5.197 2.25e-07 ***
## mydata$price_sq   0.08831    0.02998   2.946  0.00326 ** 
## mydata$age_rel    0.05801    0.01021   5.683 1.54e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 179.6 on 1822 degrees of freedom
## Multiple R-squared:  0.04285,    Adjusted R-squared:  0.04127 
## F-statistic: 27.19 on 3 and 1822 DF,  p-value: < 2.2e-16
#Get predicted dependent variable and residuals for reg5# 
weekq_predicted05 <- fitted(reg5)

#Obtaining R^2 for reg5# 
var(weekq_predicted05) / var(mydata$weekq)
## [1] 0.04284859
#Output of all regression in a table
stargazer(list(reg1,reg2,reg3,reg4,reg5), keep.stat=c("n", "adj.rsq"), type="text")
## 
## =================================================================
##                              Dependent variable:                 
##              ----------------------------------------------------
##                weekq         log_weekq              weekq        
##                 (1)        (2)       (3)       (4)        (5)    
## -----------------------------------------------------------------
## price        -3.384***            -0.075***            -7.595*** 
##               (0.516)              (0.004)              (1.461)  
##                                                                  
## log_price               -0.966***           -41.926***           
##                          (0.058)             (7.019)             
##                                                                  
## price_sq                                                0.088*** 
##                                                         (0.030)  
##                                                                  
## age_rel                                                 0.058*** 
##                                                         (0.010)  
##                                                                  
## Constant     143.294*** 5.672***  4.368***  198.514*** 137.819***
##               (7.069)    (0.134)   (0.059)   (16.024)   (12.560) 
##                                                                  
## -----------------------------------------------------------------
## Observations   1,826      1,826     1,826     1,826      1,826   
## Adjusted R2    0.022      0.130     0.144     0.019      0.041   
## =================================================================
## Note:                                 *p<0.1; **p<0.05; ***p<0.01