work_dir = "D:/Harvardx/CSCI E-116 Dynamic Modeling Time Series/"
setwd(work_dir)
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Homeprice = read_xlsx("W02b_homeprice.xlsx")
colnames(Homeprice)
## [1] "MSA (Metropolitan Statistical Area)" "homepriceg"
## [3] "bpo_pop03" "pincomeg"
## [5] "chci" "jante"
## [7] "ocean" "unaval"
## [9] "pop03"
Homeprice <- select(Homeprice, -1)
colnames(Homeprice) <- c("Home_Price", "Home Supply", "Economic Growth", "Human Capital", "Climate", "Ocean", "Geography", "City Size")
str(Homeprice)
## tibble [245 × 8] (S3: tbl_df/tbl/data.frame)
## $ Home_Price : num [1:245] 66.3 28.6 45.4 86.6 49.4 90.2 61.6 88.5 64 41 ...
## $ Home Supply : num [1:245] 3.4 5.8 6.8 4.6 11.7 5.9 6.3 3.4 6.4 15.1 ...
## $ Economic Growth: num [1:245] 108.8 75.9 77.3 98.5 111.6 ...
## $ Human Capital : num [1:245] 126 133 124 136 131 ...
## $ Climate : num [1:245] 44 26 51 25 35 48 28 23.3 36 24.1 ...
## $ Ocean : num [1:245] 0 0 0 0 0 0 0 0 0 0 ...
## $ Geography : num [1:245] 2 6.5 12.7 23.3 11.6 19.2 20.9 36.1 3.5 9.7 ...
## $ City Size : num [1:245] 158810 701139 156914 844619 772128 ...
fit_hp <- lm(Home_Price ~ . , data=Homeprice)
summary(fit_hp, digits=2)
##
## Call:
## lm(formula = Home_Price ~ ., data = Homeprice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.236 -10.241 0.245 7.949 48.058
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.288e+02 3.174e+01 -4.057 6.75e-05 ***
## `Home Supply` -2.527e+00 2.532e-01 -9.978 < 2e-16 ***
## `Economic Growth` 4.402e-01 4.539e-02 9.699 < 2e-16 ***
## `Human Capital` 1.255e+00 2.338e-01 5.370 1.88e-07 ***
## Climate -2.078e-01 1.174e-01 -1.769 0.078115 .
## Ocean 1.522e+01 4.016e+00 3.789 0.000192 ***
## Geography 3.560e-01 6.570e-02 5.419 1.47e-07 ***
## `City Size` 1.091e-06 6.474e-07 1.685 0.093371 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.06 on 237 degrees of freedom
## Multiple R-squared: 0.5479, Adjusted R-squared: 0.5345
## F-statistic: 41.03 on 7 and 237 DF, p-value: < 2.2e-16
You can also embed plots, for example:
import pandas as pd
from statsmodels.formula.api import ols
df = pd.read_excel("W02b_homeprice.xlsx")
Results = ols("df['homepriceg'] ~ df.iloc[:, 2:]", data=df).fit()
print (Results.summary())
## OLS Regression Results
## ==============================================================================
## Dep. Variable: df['homepriceg'] R-squared: 0.548
## Model: OLS Adj. R-squared: 0.535
## Method: Least Squares F-statistic: 41.03
## Date: Tue, 06 Sep 2022 Prob (F-statistic): 1.51e-37
## Time: 14:55:57 Log-Likelihood: -1038.5
## No. Observations: 245 AIC: 2093.
## Df Residuals: 237 BIC: 2121.
## Df Model: 7
## Covariance Type: nonrobust
## =====================================================================================
## coef std err t P>|t| [0.025 0.975]
## -------------------------------------------------------------------------------------
## Intercept -128.7747 31.740 -4.057 0.000 -191.304 -66.245
## df.iloc[:, 2:][0] -2.5269 0.253 -9.978 0.000 -3.026 -2.028
## df.iloc[:, 2:][1] 0.4402 0.045 9.699 0.000 0.351 0.530
## df.iloc[:, 2:][2] 1.2555 0.234 5.370 0.000 0.795 1.716
## df.iloc[:, 2:][3] -0.2078 0.117 -1.769 0.078 -0.439 0.024
## df.iloc[:, 2:][4] 15.2165 4.016 3.789 0.000 7.305 23.128
## df.iloc[:, 2:][5] 0.3560 0.066 5.419 0.000 0.227 0.485
## df.iloc[:, 2:][6] 1.091e-06 6.47e-07 1.685 0.093 -1.85e-07 2.37e-06
## ==============================================================================
## Omnibus: 2.758 Durbin-Watson: 2.110
## Prob(Omnibus): 0.252 Jarque-Bera (JB): 2.792
## Skew: 0.084 Prob(JB): 0.248
## Kurtosis: 3.495 Cond. No. 5.86e+07
## ==============================================================================
##
## Notes:
## [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
## [2] The condition number is large, 5.86e+07. This might indicate that there are
## strong multicollinearity or other numerical problems.