library(readr)
library(pastecs)
library(psych)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:pastecs':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:psych':
##
## logit
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(car)
library(reshape2)
library(readxl)
podatki <- read.table("~/Library/CloudStorage/OneDrive-Personal/IMB/Bootcamp R/R data exam/WorldHappinessReport2022.csv",
header = TRUE,
sep = ";",
dec = ",")
head(podatki)
## ID Happiness.Rank Country Region Happiness.Score
## 1 0 1 Finland Western Europe 7.821
## 2 1 2 Denmark Western Europe 7.636
## 3 2 3 Iceland Western Europe 7.557
## 4 3 4 Switzerland Western Europe 7.512
## 5 4 5 Netherlands Western Europe 7.415
## 6 5 6 Luxembourg* - 7.404
## Economy..GDP.per.Capita. Family..Social.Support. Health..Life.Expectancy.
## 1 1.892 1.258 0.775
## 2 1.953 1.243 0.777
## 3 1.936 1.320 0.803
## 4 2.026 1.226 0.822
## 5 1.945 1.206 0.787
## 6 2.209 1.155 0.790
## Freedom Trust..Government.Corruption. Generosity Year
## 1 0.736 0.534 0.109 2022
## 2 0.719 0.532 0.188 2022
## 3 0.718 0.191 0.270 2022
## 4 0.677 0.461 0.147 2022
## 5 0.651 0.419 0.271 2022
## 6 0.700 0.388 0.120 2022
colnames(podatki) <- c("ID", "HappinessRank", "Country", "Region", "HappinessScore", "GDP", "FamilySocialSupport" , "LifeExpectancy", "Freedom", "Trust", "Generosity", "Year")
podatki1 <- podatki[, c(1, 3, 4, 2, 5, 6, 7, 8, 9, 10, 11, 12)]
head(podatki1)
## ID Country Region HappinessRank HappinessScore GDP
## 1 0 Finland Western Europe 1 7.821 1.892
## 2 1 Denmark Western Europe 2 7.636 1.953
## 3 2 Iceland Western Europe 3 7.557 1.936
## 4 3 Switzerland Western Europe 4 7.512 2.026
## 5 4 Netherlands Western Europe 5 7.415 1.945
## 6 5 Luxembourg* - 6 7.404 2.209
## FamilySocialSupport LifeExpectancy Freedom Trust Generosity Year
## 1 1.258 0.775 0.736 0.534 0.109 2022
## 2 1.243 0.777 0.719 0.532 0.188 2022
## 3 1.320 0.803 0.718 0.191 0.270 2022
## 4 1.226 0.822 0.677 0.461 0.147 2022
## 5 1.206 0.787 0.651 0.419 0.271 2022
## 6 1.155 0.790 0.700 0.388 0.120 2022
Description:
This is the data from World Happiness Report 2022.
ID - Represents an identification number of input. Happiness.Rank - The ranking number of a country according to Happiness score
Country - Variable states the country for data
Region - Variable states the region of a country for data
HappinessRank - the ranking number or a country based on HappineessScore
HappinessScore - score of subjective well being (1-10)
GDP - Economy..GDP.per.Capita.: The statistics of GDP per capita in purchasing power parity
FamilySocialSupport. - Variable shows the answers to questions measuring having someone to count on in times of trouble. It is the national average of the binary responses (either 0 or 1)
LifeExpectancy. - Healthy life expectancies at birth that are based on the data extracted from the World Health Organizationâs (WHO) Global Health Observatory data repository
Freedom - National average of responses to the GWP question âAre you satisfied or dissatisfied with your freedom to choose what you do with your life?â (0 -1)
Trust - Trust government corruption: national average of the survey responses to two questions in the GWP: âIs corruption widespread throughout the government or notâ and âIs corruption widespread within businesses or not?â The overall perception is the average of the binary 0-or-1 responses.
Generosity - Residual of regressing national average of response to the GWP question âHave you donated money to a charity in the past month?â on GDP per capita.
Year - Year of the report and data results
summary(podatki1 [,c(5, 6, 7, 8, 9, 10, 11)])
## HappinessScore GDP FamilySocialSupport LifeExpectancy
## Min. :2.404 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:4.889 1st Qu.:1.095 1st Qu.:0.7320 1st Qu.:0.4632
## Median :5.569 Median :1.446 Median :0.9575 Median :0.6215
## Mean :5.554 Mean :1.410 Mean :0.9059 Mean :0.5862
## 3rd Qu.:6.305 3rd Qu.:1.785 3rd Qu.:1.1142 3rd Qu.:0.7198
## Max. :7.821 Max. :2.209 Max. :1.3200 Max. :0.9420
## Freedom Trust Generosity
## Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.4405 1st Qu.:0.06825 1st Qu.:0.0890
## Median :0.5435 Median :0.11950 Median :0.1325
## Mean :0.5172 Mean :0.15478 Mean :0.1474
## 3rd Qu.:0.6260 3rd Qu.:0.19850 3rd Qu.:0.1978
## Max. :0.7400 Max. :0.58700 Max. :0.4680
mean(podatki1$HappinessScore)
## [1] 5.553575
mean(podatki1$Freedom)
## [1] 0.517226
The average Happiness score across countries is 5,55 and average response across all countries to questions regarding Freedom is 0,52.
median(podatki1$HappinessScore)
## [1] 5.5685
median(podatki1$Freedom)
## [1] 0.5435
Half of the countries have a HappinessScore lower than 5,57 and half have a HappinessScore higher than 5,57. Half of the countries had a response to the measure of Freedom lower than 0,54 and half higher than 0,54.
hist(podatki1$HappinessScore,
main = "HappinessScore frequency",
xlab = "Happiness Score",
ylab = "Frequency")
We can see that the histogram of HappinessScore frequency is slightly
skewed to the left which tells us that HappinessScore distribution is
higher on the right side of the graph.
scatterplot(y =podatki1$Freedom,
x =podatki1$HappinessScore,
ylab= "Satisfaction of freedom",
xlab= "Happiness Score",
smooth=FALSE)
Happiness score is increasing with the feeling of satisfaction with
freedom to choose what to do in life.
ggplot(podatki1, aes(x=Region, y=HappinessScore)) +
geom_boxplot() +
xlab("Region")
We can see that HappinessScore median is the highest in the region of
New Zeland, Europe and North America. In Australia there is one country
with extremely high Happiness Score. East and Northern Africa is the
region with the highest distribution od Happiness Score rates across
countries.
A researcher examined the effects of online schooling on body weight in ninth graders (Body mass.csv in Task 2 folder). Based on data from the 2018/2019 school year, the average weight was 59.5 kg. At the beginning of the 2021/2022 school year, researchers randomly selected 50 ninth graders and measured their weight. 1 Show the descriptive statistics of body mass and its distribution with the histogram. 2 Test the following hypothesis with program R: ð»0:ð = 59.5. 3 Explain the results and interpret the effect size.
mydatat2 <- read.table("~/Library/CloudStorage/OneDrive-Personal/IMB/Bootcamp R/R data exam/Task 2/Body mass.csv",
header = TRUE,
sep = ";",
dec = ",")
head(mydatat2)
## ID Mass
## 1 1 62.1
## 2 2 64.5
## 3 3 56.5
## 4 4 53.4
## 5 5 61.3
## 6 6 62.2
Description:
ID: Identification number of a ninth grader randomly selected by researchers in the beginning of the 2021/2022 school year to measure the weight of 50 ninth graders.
Mass: Weight of a ninth grader in measured in Kg at the beginning of 2021/2022 school year
round(stat.desc(mydatat2$Mass), 2)
## nbr.val nbr.null nbr.na min max range
## 50.00 0.00 0.00 49.70 83.20 33.50
## sum median mean SE.mean CI.mean.0.95 var
## 3143.80 62.80 62.88 0.85 1.71 36.14
## std.dev coef.var
## 6.01 0.10
describe(mydatat2[ , -1])
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 50 62.88 6.01 62.8 62.56 3.34 49.7 83.2 33.5 0.85 2.11 0.85
This data set contains a sample of 50 observations. We can see that in this sample the student with the lowest weight weighs 49,70 Kg and the student who weighs the most, weighs 83,20 Kg. Half (50 %) of the students weigh 62.80 Kg or less and half of students in the sample weigh more than 62.80 Kg. The estimated average weight of a 9th grader at the beginning of 2021/2022 school year is 62.88 kg. Strandard deviation in this sample is 6.01 Kg which tells us that values vary around the mean value for 6.01 Kg up or down.
hist(mydatat2$Mass,
main = "Distribution of weight",
xlab = "Weight",
ylab = "Frequency",
breaks = seq(from = 0, to = 100, by = 5))
Distribution of this sample as seen on the historam is asymetric to
right (positive skewness).
t.test(mydatat2$Mass,
mu = 59.5)
##
## One Sample t-test
##
## data: mydatat2$Mass
## t = 3.9711, df = 49, p-value = 0.000234
## alternative hypothesis: true mean is not equal to 59.5
## 95 percent confidence interval:
## 61.16758 64.58442
## sample estimates:
## mean of x
## 62.876
t <- 3.9711
r <- sqrt((t^2) / (t^2 + 49))
r
## [1] 0.4934295
With one sample t-test we tested the hypothesis and got the p-value, which tells us the probability that the null hypothesis (H0) is TRUE.
In our case the p-value is 0.02 %. We can say that H0, saying that average height is 59.5 Kg, is extremely unlikely and we can reject the H0 at p < 0.001.
We can say with 95% certainty that the true arithmetic mean is between 61.17 and 64.58.
The calculated effect size is 0.49 which means that it is medium (0.3 to 0.5).
mydatat3 <- read_xlsx("~/Library/CloudStorage/OneDrive-Personal/IMB/Bootcamp R/R data exam/Task 3/Apartments.xlsx")
head(mydatat3)
## # A tibble: 6 Ã 5
## Age Distance Price Parking Balcony
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 7 28 1640 0 1
## 2 18 1 2800 1 0
## 3 7 28 1660 0 0
## 4 28 29 1850 0 1
## 5 18 18 1640 1 1
## 6 28 12 1770 0 1
Description:
mydatat3$ParkingFactor <- factor(mydatat3$Parking,
levels = c(0, 1),
labels = c("No", "Yes"))
mydatat3$BalconyFactor <- factor(mydatat3$Balcony,
levels = c(0, 1),
labels = c("No", "Yes"))
head(mydatat3)
## # A tibble: 6 Ã 7
## Age Distance Price Parking Balcony ParkingFactor BalconyFactor
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct>
## 1 7 28 1640 0 1 No Yes
## 2 18 1 2800 1 0 Yes No
## 3 7 28 1660 0 0 No No
## 4 28 29 1850 0 1 No Yes
## 5 18 18 1640 1 1 Yes Yes
## 6 28 12 1770 0 1 No Yes
mydatat3$ID <- seq.int(nrow(mydatat3))
mydatat3 <- mydatat3[ , c(1,2,3,4,5,6,7,8)]
print(mydatat3)
## # A tibble: 85 Ã 8
## Age Distance Price Parking Balcony ParkingFactor BalconyFactor ID
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct> <int>
## 1 7 28 1640 0 1 No Yes 1
## 2 18 1 2800 1 0 Yes No 2
## 3 7 28 1660 0 0 No No 3
## 4 28 29 1850 0 1 No Yes 4
## 5 18 18 1640 1 1 Yes Yes 5
## 6 28 12 1770 0 1 No Yes 6
## 7 14 20 1850 0 1 No Yes 7
## 8 18 6 1970 1 1 Yes Yes 8
## 9 22 7 2270 1 0 Yes No 9
## 10 25 2 2570 1 0 Yes No 10
## # ⊠with 75 more rows
t.test(mydatat3$Price ,
mu = 1900 )
##
## One Sample t-test
##
## data: mydatat3$Price
## t = 2.9022, df = 84, p-value = 0.004731
## alternative hypothesis: true mean is not equal to 1900
## 95 percent confidence interval:
## 1937.443 2100.440
## sample estimates:
## mean of x
## 2018.941
We reject H0 at p-value p-value = 0.047 or 0.47 % taking 5 % as a cut point. It is unlikely that average price of an apartment is 1900 eur/m2.Therefore we accept the alternative hypothesis that says: âThe average price is not equal to 1900 eur.â.
With can say with 95 % confidence, that the average price is between 1937.4 eur and 2100.4 eur, which is higher than sugested by the null hypothesis.
Sample estimated mean is 2018.9 eur.
fit1 <- lm(Price ~ Age,
data = mydatat3)
summary(fit1)
##
## Call:
## lm(formula = Price ~ Age, data = mydatat3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -623.9 -278.0 -69.8 243.5 776.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2185.455 87.043 25.108 <2e-16 ***
## Age -8.975 4.164 -2.156 0.034 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 369.9 on 83 degrees of freedom
## Multiple R-squared: 0.05302, Adjusted R-squared: 0.04161
## F-statistic: 4.647 on 1 and 83 DF, p-value: 0.03401
Explanation:
Estimate of regression coefficient is -8.975, which means that with every additional year of age of apartment, average price decreases by 8.975 eur per m2, assuming that everything else remains unchanged (p < 0.05).
Coefficient of determination is 0.053, which means that 5.3 % of variability of Price is explained by the effect of Age.
sqrt(0.05302)
## [1] 0.2302607
Coefficient of correlation is -0.23, which tells us that the higher the age of apartment, the lower the price per m2.
library(car)
scatterplotMatrix(mydatat3 [ , c(3, 1, 2)],
smooth = FALSE)
According to the scatterplot matrix there is no sign of potential
problem with multicollinearty as values are distanced and scattered
around the line.
fit2 <- lm(Price ~ Age + Distance,
data = mydatat3)
summary(fit2)
##
## Call:
## lm(formula = Price ~ Age + Distance, data = mydatat3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -603.23 -219.94 -85.68 211.31 689.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2460.101 76.632 32.10 < 2e-16 ***
## Age -7.934 3.225 -2.46 0.016 *
## Distance -20.667 2.748 -7.52 6.18e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 286.3 on 82 degrees of freedom
## Multiple R-squared: 0.4396, Adjusted R-squared: 0.4259
## F-statistic: 32.16 on 2 and 82 DF, p-value: 4.896e-11
vif(fit2)
## Age Distance
## 1.001845 1.001845
mean(vif(fit2))
## [1] 1.001845
All values of VIF < 5 , therefore there is no multicolinearity or stron relationship between variables.
mydatat3$StdResid <- round(rstandard(fit2), 3)
head(mydatat3[order(-mydatat3$StdResid),])
## # A tibble: 6 Ã 9
## Age Distance Price Parking Balcony ParkingFactor BalconyFactor ID StdReâŠÂ¹
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct> <int> <dbl>
## 1 5 45 2180 1 1 Yes Yes 38 2.58
## 2 2 11 2790 1 0 Yes No 33 2.05
## 3 18 1 2800 1 0 Yes No 2 1.78
## 4 18 1 2800 1 1 Yes Yes 61 1.78
## 5 8 2 2820 1 0 Yes No 58 1.66
## 6 10 1 2810 0 0 No No 57 1.60
## # ⊠with abbreviated variable name ¹âStdResid
hist(mydatat3$StdResid,
xlab = "Standardized residuals",
ylab = "Frequency",
main = "Histogram of standardized residuals")
All values of standardized residuals are between -3 and 3 and therefore
we will not remove any unit.
mydatat3$CooksD <- round(cooks.distance(fit2), 3)
head(mydatat3[order(-mydatat3$CooksD),])
## # A tibble: 6 Ã 10
## Age Distance Price Parking Balcony ParkingFacâŠÂ¹ BalcoâŠÂ² ID StdReâŠÂ³ CooksD
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct> <int> <dbl> <dbl>
## 1 5 45 2180 1 1 Yes Yes 38 2.58 0.32
## 2 43 37 1740 0 0 No No 55 1.44 0.104
## 3 2 11 2790 1 0 Yes No 33 2.05 0.069
## 4 7 2 1760 0 1 No Yes 53 -2.15 0.066
## 5 37 3 2540 1 1 Yes Yes 22 1.58 0.061
## 6 40 2 2400 0 1 No Yes 39 1.09 0.038
## # ⊠with abbreviated variable names ¹âParkingFactor, ²âBalconyFactor, ³âStdResid
hist(mydatat3$CooksD,
xlab = "Cooks distance",
ylab = "Frequency",
main = "Histogram of Cooks distances")
We see there is a unit with significantly higher Cooks distance,
according to the histogram above. We see that the Cooks distance of 0.32
that is significantly higher belongs to the apartment with ID = 38. We
will remove this unit.
mydatat3 <- mydatat3[c(-38), ]
head(mydatat3[order(-mydatat3$CooksD),])
## # A tibble: 6 Ã 10
## Age Distance Price Parking Balcony ParkingFacâŠÂ¹ BalcoâŠÂ² ID StdReâŠÂ³ CooksD
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct> <int> <dbl> <dbl>
## 1 43 37 1740 0 0 No No 55 1.44 0.104
## 2 2 11 2790 1 0 Yes No 33 2.05 0.069
## 3 7 2 1760 0 1 No Yes 53 -2.15 0.066
## 4 37 3 2540 1 1 Yes Yes 22 1.58 0.061
## 5 40 2 2400 0 1 No Yes 39 1.09 0.038
## 6 8 2 2820 1 0 Yes No 58 1.66 0.037
## # ⊠with abbreviated variable names ¹âParkingFactor, ²âBalconyFactor, ³âStdResid
fit2 <- lm(Price ~ Age + Distance,
data = mydatat3)
mydatat3$StdfittedValues <- scale(fit2$fitted.values)
library(car)
scatterplot(y = mydatat3$StdResid, x= mydatat3$StdfittedValues,
xlab = "Standarized fitted values",
ylab = "Standarized residuals",
boxplots = FALSE,
regLine = FALSE,
smooth = FALSE)
According to the scatterplot we can not see that there is
heteroskedasticity. We run further testing with the Breusch Pagan
Test
library (olsrr)
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
ols_test_breusch_pagan(fit2)
##
## Breusch Pagan Test for Heteroskedasticity
## -----------------------------------------
## Ho: the variance is constant
## Ha: the variance is not constant
##
## Data
## ---------------------------------
## Response : Price
## Variables: fitted values of Price
##
## Test Summary
## -----------------------------
## DF = 1
## Chi2 = 2.927455
## Prob > Chi2 = 0.08708469
We can see that p >0.05 and therefore we can not reject H0 that states that the variance is constant (homoskedasticity). Therefore we conclude that there is no heteroskedasticity.
hist(mydatat3$StdResid,
xlab = "Standardized residuals",
ylab = "Frequency",
main = "Histogram of standardized residuals")
shapiro.test(mydatat3$StdResid)
##
## Shapiro-Wilk normality test
##
## data: mydatat3$StdResid
## W = 0.94879, p-value = 0.002187
We can see from the histogram that the distribution of standardized residuals is not normal.
Shapiro-Wilk normallity test tested the H0: Variable is normally distributed H1: Variable is NOT normally distributed
With p-value: 0.0018 (p < 0.05) we can reject the null hypothesis and accept the alternative -> variable is NOT normally distributed.
fit2 <- lm(Price ~ Age + Distance,
data = mydatat3)
summary(fit2)
##
## Call:
## lm(formula = Price ~ Age + Distance, data = mydatat3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -604.92 -229.63 -56.49 192.97 599.35
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2456.076 73.931 33.221 < 2e-16 ***
## Age -6.464 3.159 -2.046 0.044 *
## Distance -22.955 2.786 -8.240 2.52e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 276.1 on 81 degrees of freedom
## Multiple R-squared: 0.4838, Adjusted R-squared: 0.4711
## F-statistic: 37.96 on 2 and 81 DF, p-value: 2.339e-12
sqrt(0.4831)
## [1] 0.695054
Explanation:
Estimate of regression coefficient for Age is -6.46, which means that with every additional year of age of apartment, average price decreases by -6.46 eur per m2, assuming that everything else remains unchanged (p < 0.05).
Estimate of regression coefficient for the explanatory variable Distance is -22.562, which means that with every 1 Km increase in Distance, the Price on average decreases by 22.562 eur per m2 (p < 0.001), assuming that all other variables remain unchanged.
Coefficient of determination is 0.4831, which tells us that 48.31 % of variability of Price is explained by linear effect of Age and Distance.
Multiple coefficient of correlation is -0.695, which means that linear relationship between Price and the two explanatory variables (Age and Distance) is medium strong.
fit3 <- lm(Price ~ Age + Distance + ParkingFactor + BalconyFactor,
data = mydatat3)
anova(fit2, fit3)
## Analysis of Variance Table
##
## Model 1: Price ~ Age + Distance
## Model 2: Price ~ Age + Distance + ParkingFactor + BalconyFactor
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 81 6176767
## 2 79 5654480 2 522287 3.6485 0.03051 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
H0: fit2 fits data better than fit3 H1: fit3 fits data better than fit2
p-value is 0.018, therefore we can reject H0 and accept H1 and so conclude that fit3 fits the data better than fit2.
summary(fit3)
##
## Call:
## lm(formula = Price ~ Age + Distance + ParkingFactor + BalconyFactor,
## data = mydatat3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -473.21 -192.37 -28.89 204.17 558.77
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2329.724 93.066 25.033 < 2e-16 ***
## Age -5.821 3.074 -1.894 0.06190 .
## Distance -20.279 2.886 -7.026 6.66e-10 ***
## ParkingFactorYes 167.531 62.864 2.665 0.00933 **
## BalconyFactorYes -15.207 59.201 -0.257 0.79795
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 267.5 on 79 degrees of freedom
## Multiple R-squared: 0.5275, Adjusted R-squared: 0.5035
## F-statistic: 22.04 on 4 and 79 DF, p-value: 3.018e-12
We can see that given the Age, Distance and Balcony, the apartments with Parking, are on average priced 180.399 eur higher per m2 than those without Parking. (p < 0.01).
Estimate of regression coefficient for the explanatory variable BalconyFactor is -26.178, but p-value is 0.66 (p > 0,05), therefore we can not say that Balcony ha affect on the Price.
ANOVA H0: R2 = 0 H1: R2 > 0
P-value: 3.018e-12 (p < 0.001). We reject H0 which means there is linear correlation between dependent (Price) and at least one explanatory variable. Model is good (R squared > 0)
mydatat3$FittedValues <- fitted.values(fit3)
mydatat3$Residuals <- residuals(fit3)
head(fit3$residuals)
## 1 2 3 4 5 6
## -65.95206 427.80292 -61.15894 286.56890 -372.24396 -138.17733