Probability

Below is the analysis done on House Prices. The dataset used for the analysis may be access on the Kaggle.com webpage tagged: “House Prices: Advanced Regression Techniques.”

The dataset comprises of both quantitative and qualitative data types, It has 1460 observations and 81 Variables. The analysis was done on the Lot Area and Sale Price variables of the dataset. I chose to work with these variable, because I wanted to know if actually there is a “reasonable relationship between Sale Price and the Lot Area as its usually seen it home sale adverts.”

I made used of statistical tools like Descriptive and inferential statistics, Probability, Linear algebra and correletion, Modelling and calculus-based Probability.

# Kindly install the underlisted packages and before continue for error- free analysis. 


options(warn = -1)
library(stringr)
library(tidyr)
library(knitr)
#require(e1071)
library(e1071)
suppressMessages(require(plotly))
suppressMessages(require(Amelia))
suppressMessages(require(ggplot2))
suppressMessages(require(MCMCpack))
suppressMessages(require(manipulate))
suppressMessages(require(stargazer))
suppressMessages(require(xtable))

train <- read.csv("C:\\Users\\mascot\\Documents\\GitHub\\MSDA 605\\train.csv", header = TRUE,sep = ",") # Read the dataset from local file
kable(head(train)) # Glimpses of the datatset

Id	MSSubClass	MSZoning	LotFrontage	LotArea	Street	Alley	LotShape	LandContour	Utilities	LotConfig	LandSlope	Neighborhood	Condition1	Condition2	BldgType	HouseStyle	OverallQual	OverallCond	YearBuilt	YearRemodAdd	RoofStyle	RoofMatl	Exterior1st	Exterior2nd	MasVnrType	MasVnrArea	ExterQual	ExterCond	Foundation	BsmtQual	BsmtCond	BsmtExposure	BsmtFinType1	BsmtFinSF1	BsmtFinType2	BsmtUnfSF	TotalBsmtSF	Heating	HeatingQC	CentralAir	Electrical	X1stFlrSF	X2ndFlrSF	GrLivArea	BsmtFullBath	BsmtHalfBath	FullBath	HalfBath	BedroomAbvGr	KitchenAbvGr	KitchenQual	TotRmsAbvGrd	Functional	Fireplaces	FireplaceQu	GarageType	GarageYrBlt	GarageFinish	GarageCars	GarageArea	GarageQual	GarageCond	PavedDrive	WoodDeckSF	OpenPorchSF	EnclosedPorch	X3SsnPorch	PoolQC	Fence	MiscFeature	MiscVal	MoSold	YrSold	SaleType	SaleCondition	SalePrice
1	60	RL	65	8450	Pave	NA	Reg	Lvl	AllPub	Inside	Gtl	CollgCr	Norm	Norm	1Fam	2Story	7	5	2003	2003	Gable	CompShg	VinylSd	VinylSd	BrkFace	196	Gd	TA	PConc	Gd	TA	No	GLQ	706	Unf	150	856	GasA	Ex	Y	SBrkr	856	854	1710	1	0	2	1	3	1	Gd	8	Typ	0	NA	Attchd	2003	RFn	2	548	TA	TA	Y	0	61	0	0	NA	NA	NA	0	2	2008	WD	Normal	208500
2	20	RL	80	9600	Pave	NA	Reg	Lvl	AllPub	FR2	Gtl	Veenker	Feedr	Norm	1Fam	1Story	6	8	1976	1976	Gable	CompShg	MetalSd	MetalSd	None	0	TA	TA	CBlock	Gd	TA	Gd	ALQ	978	Unf	284	1262	GasA	Ex	Y	SBrkr	1262	0	1262	0	1	2	0	3	1	TA	6	Typ	1	TA	Attchd	1976	RFn	2	460	TA	TA	Y	298	0	0	0	NA	NA	NA	0	5	2007	WD	Normal	181500
3	60	RL	68	11250	Pave	NA	IR1	Lvl	AllPub	Inside	Gtl	CollgCr	Norm	Norm	1Fam	2Story	7	5	2001	2002	Gable	CompShg	VinylSd	VinylSd	BrkFace	162	Gd	TA	PConc	Gd	TA	Mn	GLQ	486	Unf	434	920	GasA	Ex	Y	SBrkr	920	866	1786	1	0	2	1	3	1	Gd	6	Typ	1	TA	Attchd	2001	RFn	2	608	TA	TA	Y	0	42	0	0	NA	NA	NA	0	9	2008	WD	Normal	223500
4	70	RL	60	9550	Pave	NA	IR1	Lvl	AllPub	Corner	Gtl	Crawfor	Norm	Norm	1Fam	2Story	7	5	1915	1970	Gable	CompShg	Wd Sdng	Wd Shng	None	0	TA	TA	BrkTil	TA	Gd	No	ALQ	216	Unf	540	756	GasA	Gd	Y	SBrkr	961	756	1717	1	0	1	0	3	1	Gd	7	Typ	1	Gd	Detchd	1998	Unf	3	642	TA	TA	Y	0	35	272	0	NA	NA	NA	0	2	2006	WD	Abnorml	140000
5	60	RL	84	14260	Pave	NA	IR1	Lvl	AllPub	FR2	Gtl	NoRidge	Norm	Norm	1Fam	2Story	8	5	2000	2000	Gable	CompShg	VinylSd	VinylSd	BrkFace	350	Gd	TA	PConc	Gd	TA	Av	GLQ	655	Unf	490	1145	GasA	Ex	Y	SBrkr	1145	1053	2198	1	0	2	1	4	1	Gd	9	Typ	1	TA	Attchd	2000	RFn	3	836	TA	TA	Y	192	84	0	0	NA	NA	NA	0	12	2008	WD	Normal	250000
6	50	RL	85	14115	Pave	NA	IR1	Lvl	AllPub	Inside	Gtl	Mitchel	Norm	Norm	1Fam	1.5Fin	5	5	1993	1995	Gable	CompShg	VinylSd	VinylSd	None	0	TA	TA	Wood	Gd	TA	No	GLQ	732	Unf	64	796	GasA	Ex	Y	SBrkr	796	566	1362	1	0	1	1	1	1	TA	5	Typ	0	NA	Attchd	1993	Unf	2	480	TA	TA	Y	40	30	0	320	NA	MnPrv	Shed	700	10	2009	WD	Normal	143000

dim(train) # dimension

## [1] 1460   81

test <- read.csv("C:\\Users\\mascot\\Documents\\GitHub\\MSDA 605\\test.csv", header = TRUE,sep = ",")

Below is the subset of the needed variables for analysis and there respectful inferencial statistics

X <- data.frame(train$SalePrice)
Y <- data.frame(train$LotArea) 
# Combinig the two dataset
X_Y <- cbind.data.frame(train$LotArea, train$SalePrice)
colnames(X_Y) <- c("Lot_Area", "Sale_Price")
kable(head(X_Y))

Lot_Area	Sale_Price
8450	208500
9600	181500
11250	223500
9550	140000
14260	250000
14115	143000

kable(summary(X_Y)); # Quartiles and Means

	Lot_Area	Sale_Price
	Min. : 1300	Min. : 34900
	1st Qu.: 7554	1st Qu.:129975
	Median : 9478	Median :163000
	Mean : 10517	Mean :180921
	3rd Qu.: 11602	3rd Qu.:214000
	Max. :215245	Max. :755000

kable(var(X_Y)); #Variances as diagonal

	Lot_Area	Sale_Price
Lot_Area	99625650	209211070
Sale_Price	209211070	6311111264

Probabilities

a1 <- 1 - pnorm(214000, mean=180921, sd=sqrt(6311111264)) # P(X>x) for 3rd quatile (X variable)
a2 <- pnorm(214000, mean=180921, sd=sqrt(6311111264)) # P(X<=x) for 3rd quatile (X variable)

a1;

## [1] 0.3385626

a2;

## [1] 0.6614374

#let P(Y|X) = 0.5

Probabilties cont’d

b1 <- 1 - pnorm(9478, mean=10517, sd=sqrt(99625650)) # P(Y>y) for 3rd quatile (Y variable)
b2 <- pnorm(9478, mean=10517, sd=sqrt(99625650)) # P(Y<=y) for 3rd quatile (Y variable)

b1;

## [1] 0.541453

b2;

## [1] 0.458547

Recall:

\(Pr(A|B)\quad =\frac { Pr(B|A).Pr(A) }{ Pr(B) } \quad =\quad \frac { Pr(B|A).Pr(A) }{ Pr(B|A).Pr(A)\quad +\quad Pr(B|A\prime ).Pr(A\prime ) }\)

# for P(X>x | Y>y)
 
a <- (0.5 * a1)/((0.5 * a1)+(0.5*a2))
a

## [1] 0.3385626

# for P(X>x, Y>y) = P(x).P(y)

b <- a1 * b1
b

## [1] 0.1833158

# for P(X<x|Y>y)

c <- (0.5 * a2)/((0.5 * a2)+(0.5*a2))
c

## [1] 0.5

Contigency Table analysis

col1_row1 <- nrow(subset(X_Y, Sale_Price <= 214000 & Lot_Area <= 9478))
col1_row2 <- nrow(subset(X_Y, Sale_Price <= 214000 & Lot_Area > 9478))
col2_row1 <- nrow(subset(X_Y, Sale_Price > 214000 & Lot_Area <= 9478))
col2_row2 <- nrow(subset(X_Y, Sale_Price > 214000 & Lot_Area > 9478))

Row-Column Analysis.

col1_row1;

## [1] 660

col1_row2;

## [1] 438

col2_row1;

## [1] 70

col2_row2

## [1] 292

#Checking

checking <- col1_row1 + col1_row2 + col2_row1 + col2_row2;
print.default("Below is the for checking for the number of observation with respect to the analysis:")

## [1] "Below is the for checking for the number of observation with respect to the analysis:"

print(checking)

## [1] 1460

Contigency Table

suppressMessages(require(taRifx))
library(xtable)

e <- cbind.data.frame(c (col1_row1,col1_row2, (col1_row1 +col1_row2)))
f <- cbind.data.frame(c (col2_row1,col2_row2, (col2_row1 +col2_row2)))
g <- e+f

con_table <- cbind.data.frame(c(e,f,g))

con_table;

##   c(col1_row1, col1_row2, (col1_row1 + col1_row2))
## 1                                              660
## 2                                              438
## 3                                             1098
##   c(col2_row1, col2_row2, (col2_row1 + col2_row2))
## 1                                               70
## 2                                              292
## 3                                              362
##   c(col1_row1, col1_row2, (col1_row1 + col1_row2))
## 1                                              730
## 2                                              730
## 3                                             1460

row.names(con_table) = c("<=3d quartile", ">3d quartile", "Total")
colnames(con_table) = c("<=2d quartile", ">2d quartile", "Total")
kable(con_table);

	<=2d quartile	>2d quartile	Total
<=3d quartile	660	70	730
>3d quartile	438	292	730
Total	1098	362	1460

Descriptive and Inferential Statistics

Chi-sqaure of the dataset

attach(X_Y)
chisq.test(X_Y)

## 
##  Pearson's Chi-squared test
## 
## data:  X_Y
## X-squared = 7125100, df = 1459, p-value < 2.2e-16

Checking for any missing value(s).

missmap(train, main = "Missing values vs observed")

missmap(X_Y, main = "Missing values vs observed") # Note that LotArea & SalePrice has no missing value.

It was found that ther is no missing value(s) in the Sale Price and Lot Area variable.

Below is the boxplot, skewness value, quartile, QQPlot and histogram of Sale Price showing it right-Skewed.

boxplot.default(X_Y)

quantile(X_Y$Lot_Area)

##       0%      25%      50%      75%     100% 
##   1300.0   7553.5   9478.5  11601.5 215245.0

quantile((X_Y$Sale_Price))

##     0%    25%    50%    75%   100% 
##  34900 129975 163000 214000 755000

skewness(X_Y$Sale_Price)

## [1] 1.879009

(mfrow=c(1,2))

## [1] 1 2

qqnorm(X_Y$Sale_Price)
qqline(X_Y$Sale_Price)

hist(X_Y$Sale_Price, main = "SALE PRICES", xlab = "Sale Price");

More Plots confirmation. ( The plot is interactive, play with it!)

plot_ly(x =X_Y$Sale_Price, type="histogram", xaxis = "SALE PRICES")

GGplot depeicting the skewness to the right.

ggplot(data = X_Y) + geom_density(aes(x=X_Y$Sale_Price), fill="brown") + ggtitle("Lot Areas");

Checking for the length of LotsArea.

length(X_Y$Sale_Price)==1460

## [1] TRUE

dim(X_Y)

## [1] 1460    2

Linear algebra and Correlation

Generalised Linear Model, Confident Interval and Correlation co-efficient

x_y2 <- glm(Sale_Price~Lot_Area, data=X_Y)
summary(x_y2)

## 
## Call:
## glm(formula = Sale_Price ~ Lot_Area, data = X_Y)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -275668   -48169   -17725    31248   553356  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.588e+05  2.915e+03   54.49   <2e-16 ***
## Lot_Area    2.100e+00  2.011e-01   10.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 5875801165)
## 
##     Null deviance: 9.2079e+12  on 1459  degrees of freedom
## Residual deviance: 8.5669e+12  on 1458  degrees of freedom
## AIC: 36989
## 
## Number of Fisher Scoring iterations: 2

t.test(X_Y$Sale_Price, X_Y$Lot_Area, conf.level=0.95);

## 
##  Welch Two Sample t-test
## 
## data:  X_Y$Sale_Price and X_Y$Lot_Area
## t = 81.321, df = 1505.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  166294.1 174514.7
## sample estimates:
## mean of x mean of y 
## 180921.20  10516.83

confint(x_y2);

## Waiting for profiling to be done...

##                    2.5 %       97.5 %
## (Intercept) 1.531234e+05 1.645489e+05
## Lot_Area    1.705906e+00 2.494037e+00

cor(X_Y$Sale_Price, X_Y$Lot_Area)

## [1] 0.2638434

From the above, we saw that the analysis is significance at less P-value, but with weak Co-efficient.

#color = c("red", "green")
plt <- plot(X_Y, xlab="Sale Price", ylab="Lot Area", main="SALE PRICES Against LOT AREAS", pch=1, cex.main=1.5, frame.plot=TRUE);

abline(lm(X_Y$Sale_Price~X_Y$Lot_Area), col="red")

An interactive Plot.

plot_ly(x =X_Y$Sale_Price, y =X_Y$Lot_Area , mode = "markers+lines",
        color = "brown", type= "scatter",line = list(shape = "linear"))

SETTING UP HYPOTHESIS:

Null Hypothesis: The population parameters(means) are the same

\({ H }_{ 0 }\quad :\quad { cor }_{ x_y }={ 0 }\)

Alternative Hypothesis: Not \({ H }_{ 0 }\quad\)

\({ H }_{ 1 }\quad :\quad { cor }_{ x_y }\not= 0\)

Rejection:

Rejection \({ H }_{ 0 }\) if the calculated value (Correlation) is not equal to Zero(0) i.e , otherwise do not reject \({ H }_{ 0 }\quad\).

matrix(X_Y)

##      [,1]        
## [1,] Integer,1460
## [2,] Integer,1460

cor.test(X_Y$Sale_Price, X_Y$Lot_Area, conf.level=0.99);

## 
##  Pearson's product-moment correlation
## 
## data:  X_Y$Sale_Price and X_Y$Lot_Area
## t = 10.445, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 99 percent confidence interval:
##  0.2000196 0.3254375
## sample estimates:
##       cor 
## 0.2638434

Decision and conclusion

Since their correlation (0.2638) is not eqaul to zero, we therefore reject \({ H }_{ 0 }\), and conclude that they have non-zero correlation.

Interpretation

Since the 99 percent confidence interval is also non-zero, we can confirm that our analysis is correct, and it means that we are 99 percent confident that the true correlation value would fall in between 0.2000 and 0.3254.

Calculus-based Probability & Statistics

suppressWarnings(library(rags2ridges))


matx <- as.matrix.data.frame(X_Y)

cormax <- covML(matx, cor = TRUE) #Covariance Matrix


prec_max <- ridgeP(cormax, lambda = 10, type = "Alt") #precision matrix

cormax;

##             Lot_Area Sale_Price
## Lot_Area   1.0000000  0.2638434
## Sale_Price 0.2638434  1.0000000

prec_max;

##               Lot_Area  Sale_Price
## Lot_Area    1.06846261 -0.02425832
## Sale_Price -0.02425832  1.06846261

cor_prec <- cormax*prec_max; #Corrrelation & Precision Matrix

prec_cor <- prec_max*cormax; 


cor_prec;

##                Lot_Area   Sale_Price
## Lot_Area    1.068462605 -0.006400397
## Sale_Price -0.006400397  1.068462605

prec_cor;

##                Lot_Area   Sale_Price
## Lot_Area    1.068462605 -0.006400397
## Sale_Price -0.006400397  1.068462605

Principal Component Analysis.

pca <- prcomp(X_Y, scale. = T)
names(pca)

## [1] "sdev"     "rotation" "center"   "scale"    "x"

biplot(pca, scale=0);

pca_std <- pca$sdev; 
pca_var <- pca_std^2;
pca_varexp <- pca_var/sum(pca_var);


plot(pca_varexp, xlab = "Principal Component Analysis", ylab = "Proportion of Variance Explained", type="b")

We saw from the above that the Sale Price is skewed to the right, so I decided to apply logarithm.

#require(AUC)

#require(RCurl)
#require(pROC)
suppressMessages(require(ROCR))

X_Ylog <- cbind.data.frame(X_Y$Lot_Area, X_Y$Sale_Price, log(X_Y$Lot_Area), log(X_Y$Sale_Price) )
colnames(X_Ylog) = c("Lot_Area", "Sale_Price", "Lot_Area_Log", "Sale_Price_log")

kable(head(X_Ylog))

Lot_Area	Sale_Price	Lot_Area_Log	Sale_Price_log
8450	208500	9.041922	12.24769
9600	181500	9.169518	12.10901
11250	223500	9.328123	12.31717
9550	140000	9.164296	11.84940
14260	250000	9.565214	12.42922
14115	143000	9.554993	11.87060

kable(summary(X_Ylog))

Lot_Area	Sale_Price	Lot_Area_Log	Sale_Price_log
Min. : 1300	Min. : 34900	Min. : 7.170	Min. :10.46
1st Qu.: 7554	1st Qu.:129975	1st Qu.: 8.930	1st Qu.:11.78
Median : 9478	Median :163000	Median : 9.157	Median :12.00
Mean : 10517	Mean :180921	Mean : 9.111	Mean :12.02
3rd Qu.: 11602	3rd Qu.:214000	3rd Qu.: 9.359	3rd Qu.:12.27
Max. :215245	Max. :755000	Max. :12.280	Max. :13.53

We also tried to fit it by shifting it to the left.

max_x <- 755000
X_shift <- X_Y$Sale_Price + max_x
summary(X_shift)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  789900  885000  918000  935900  969000 1510000

fitdistr(X_shift, densfun = "exponential") # fitting exponentially

##        rate    
##   1.068466e-06 
##  (2.796303e-08)

lamda <- rexp(1000, X_shift) # A sample of 1000

Side-by-side plots of the fitted data.

attach(X_Ylog)

## The following objects are masked from X_Y:
## 
##     Lot_Area, Sale_Price

plot_ly(x =X_Y$Sale_Price, type="histogram", xaxis = "SALE PRICES")

plot_ly(x =lamda, type="histogram", xaxis = "OPTIMAL VALUE OF SALE PRICES")

hist(x =X_Ylog, color="blue", main = "SALE PRICES LOG")

plot_ly(x =X_Ylog$Sale_Price_log, type="histogram")

Bayes depicting density and trace of the Lot Area to the Sale Price.

bayes <- MCMCregress(Sale_Price ~ Lot_Area, data=X_Y)
summary(bayes);

## 
## Iterations = 1001:11000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                  Mean        SD  Naive SE Time-series SE
## (Intercept) 1.588e+05 2.917e+03 2.917e+01      2.917e+01
## Lot_Area    2.099e+00 2.036e-01 2.036e-03      2.036e-03
## sigma2      5.884e+09 2.182e+08 2.182e+06      2.182e+06
## 
## 2. Quantiles for each variable:
## 
##                  2.5%       25%       50%       75%     97.5%
## (Intercept) 1.531e+05 1.569e+05 1.589e+05 1.608e+05 1.646e+05
## Lot_Area    1.698e+00 1.961e+00 2.096e+00 2.235e+00 2.502e+00
## sigma2      5.470e+09 5.734e+09 5.880e+09 6.030e+09 6.321e+09

plot(bayes)

library(xtable)
options(xtable.floating = FALSE)
options(xtable.timestamp = "")

Modelling

SETTING UP HYPOTHESIS:

Null Hypothesis: There is no relationship between Sale Prices and Lot Area

\({ H }_{ 0 }\quad :\quad { \mu }_{ 1 } =\quad { \mu }_{ 2 }\quad ={ \quad \mu }_{ 3 }............={ \mu }_{ n }\)

Against

Alternative Hypothesis:

Not \({ H }_{ 0 }\quad\)

Rejection:

Rejection \({ H }_{ 0 }\) if the calculated value (P-Value) is less the tabulated value(Table Value = 0.05) i.e P \(\le \quad \alpha\), at 0.05, otherwise do not reject \({ H }_{ 0 }\quad\).

x_y2 <- glm(Sale_Price~Lot_Area, data=X_Y)
summary(x_y2)

## 
## Call:
## glm(formula = Sale_Price ~ Lot_Area, data = X_Y)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -275668   -48169   -17725    31248   553356  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.588e+05  2.915e+03   54.49   <2e-16 ***
## Lot_Area    2.100e+00  2.011e-01   10.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 5875801165)
## 
##     Null deviance: 9.2079e+12  on 1459  degrees of freedom
## Residual deviance: 8.5669e+12  on 1458  degrees of freedom
## AIC: 36989
## 
## Number of Fisher Scoring iterations: 2

anova(x_y2, test="Chisq")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: Sale_Price
## 
## Terms added sequentially (first to last)
## 
## 
##          Df   Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                        1459 9.2079e+12              
## Lot_Area  1 6.4099e+11      1458 8.5669e+12 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(x_y2)

require(graphics)
dim(train)

## [1] 1460   81

dim(test)

## [1] 1459   80

X_YTRIM <- X_Y[sample(nrow(X_Ylog), 1459), ]
pred.w.plim <- predict(lm(X_YTRIM$Sale_Price ~ X_YTRIM$Lot_Area), test,interval = "prediction")
pred.w.clim <- predict(lm(X_YTRIM$Sale_Price ~ X_YTRIM$Lot_Area), test, interval = "confidence")

matplot(test$LotArea, cbind(pred.w.clim, pred.w.plim[,-1]),
        lty = c(1,2,2,3,3), type = "l", ylab = "Predicted Lot Area")

DECISION AND CONCLUSION:

From the above analysis, we found that the calculated table value, P-value(\(<2e^-16\)) is less than the tabulated value (at \(\alpha\) <= 0.05), we therefore fail to accept \(\ H_0\) (Null Hypothesis) as there is no enough evidence that negate the relationship between Sale Price and Lot Area, but to conclude that there is relationship between Sale Price and Lot Area even with weak positive correlation co-efficient.

THANK YOU FOR YOUR TIME

write.table(pred.w.plim, file = "Sale_Price.csv",row.names=FALSE, na="",col.names=TRUE, sep=",")

|IS 605 FUNDAMENTALS OF COMPUTATIONAL MATHEMATICS - PROJECT | Data Analytics

Author:MUSA T GANIYU

Probability

Descriptive and Inferential Statistics

Linear algebra and Correlation

Calculus-based Probability & Statistics

Modelling