For Task 1 I used data set “Salaries” from library carData in R.
library(carData)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
mydata <- as.data.frame(carData::Salaries)
head(mydata)
## rank discipline yrs.since.phd yrs.service sex salary
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
str(mydata)
## 'data.frame': 397 obs. of 6 variables:
## $ rank : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
## $ discipline : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
## $ yrs.since.phd: int 19 20 4 45 40 6 30 45 21 18 ...
## $ yrs.service : int 18 16 3 39 41 6 23 45 20 18 ...
## $ sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
## $ salary : int 139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...
Description of the variables:
rank (Assistant Professor, Associate Professor, Professor).
discipline (A = “theoretical” departments, B = “applied” departments).
yrs.since.phd = years since PhD.
yrs.service = years of service.
sex (Female or Male).
salary = nine-month salary, in dollars.
mydata$salary_thousands <- mydata$salary / 1000
head(mydata$salary_thousands)
## [1] 139.75 173.20 79.75 115.00 141.50 97.00
mydata <- mydata[ , !(names(mydata) %in% c("salary", "yrs.since.phd"))]
names(mydata)[names(mydata) == "yrs.service"] <- "years_service"
levels(mydata$discipline) <- c("Theoretical", "Applied")
mydata_fullprof <- subset(mydata, rank == "Prof")
#Three highest earning Full Professors
head((mydata_fullprof[order(mydata_fullprof$salary_thousands, decreasing = TRUE), -1]),3)
## discipline years_service sex salary_thousands
## 44 Applied 38 Male 231.545
## 365 Theoretical 43 Male 205.500
## 250 Theoretical 7 Male 204.000
library(pastecs)
##
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
##
## first, last
summary(mydata$years_service)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 7.00 16.00 17.61 27.00 60.00
Explanation:
round(stat.desc(mydata$salary_thousands),2)
## nbr.val nbr.null nbr.na min max range sum median mean
## 397.00 0.00 0.00 57.80 231.54 173.74 45141.46 107.30 113.71
## SE.mean CI.mean.0.95 var std.dev coef.var
## 1.52 2.99 917.43 30.29 0.27
Explanation:
The average nine-month salary of professors is 113.71 thousand USD.
The standard deviation is 30.29 thousand USD, which shows the typical deviation of individual salaries from the mean.
By applying the empirical rule (mean ± 1, 2, and 3 standard deviations), we find that most professors (≈68.27%) earn between 83k and 144k USD. Almost all (≈95.45%) are within 53k and 174k, and practically the entire data set (≈99.73%) falls between 23k and 205k USD.
nrow(mydata_fullprof)
## [1] 266
#Counts and percentages of full professors by sex
mydata_fullprof %>%
count(sex) %>%
mutate(percent = round(100 * n / sum(n), 1))
## sex n percent
## 1 Female 18 6.8
## 2 Male 248 93.2
Explanation:
df_gender <- mydata_fullprof %>%
count(sex) %>%
mutate(percent = round(100 * n / sum(n), 1))
ggplot(df_gender, aes(x = sex, y = percent, fill = sex)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = paste0(percent, "%")),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_brewer(palette = "Pastel1") +
scale_y_continuous(limits = c(0, 105)) + #extend axis to show labels
labs(title = "Gender Distribution of Full Professors",
x = "", y = "Percentage") +
theme_minimal(base_size = 13) +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5)) #center title
Explanation:
library(scales)
ggplot(mydata_fullprof, aes(x = salary_thousands)) +
geom_histogram(binwidth = 5, fill = "palegreen3", color = "white", alpha = 0.9) +
geom_density(aes(y = after_stat(count) * 5), color = "black", linewidth = 1) +
geom_vline(xintercept = mean(mydata_fullprof$salary_thousands, na.rm = TRUE),
color = "navyblue", linewidth = 1) + #vertical line for mean
geom_vline(xintercept = median(mydata_fullprof$salary_thousands, na.rm = TRUE),
color = "red3", linewidth = 1, linetype = "dashed") + #dashed line for median
annotate("text",
x = mean(mydata_fullprof$salary_thousands, na.rm = TRUE), y = Inf, vjust = 1.6,
label = paste0("Mean = ", round(mean(mydata_fullprof$salary_thousands), 1), "k"),
color = "navyblue", fontface = "bold") + #annotating mean line
annotate("text",
x = median(mydata_fullprof$salary_thousands, na.rm = TRUE), y = Inf, vjust = 3.2,
label = paste0("Median = ", round(median(mydata_fullprof$salary_thousands), 1), "k"),
color = "red3", fontface = "bold") + #annotating median line
labs(title = "Distribution of Nine-Month Salary for Full Professors",
x = "Salary (thousands USD)", y = "Count") +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(hjust = 0.5)) +
coord_cartesian(ylim = c(0, NA), expand = FALSE) + #fix y-axis so bars/text are fully visible
scale_y_continuous(expand = expansion(mult = c(0.02, 0.12)))
Explanation:
ggplot(mydata, aes(x = years_service, y = salary_thousands, color = rank)) +
geom_point(alpha = 0.55, size = 2) +
geom_smooth(aes(group = 1), method = "lm", se = FALSE, color = "black", linewidth = 1) + #overall trend
scale_color_brewer(palette = "Set1") +
labs(title = "Years of Service vs Salary (overall trend in black)",
x = "Years of Service", y = "Salary (thousands USD)", color = "Rank") +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula = 'y ~ x'
Explanation:
library(readxl)
mydata1 <- read_excel("~/Desktop/IMB/Bootcamp/Statistics + R/R working directory/Bootcamp_25_26/R Take Home Exam 2025/Task 2/Business School.xlsx")
head(as.data.frame(mydata1), 3) #convert tibble to data.frame so all columns show in knitted output
## Student ID Undergrad Degree Undergrad Grade MBA Grade Work Experience Employability (Before) Employability (After)
## 1 1 Business 68.4 90.2 No 252 276
## 2 2 Computer Science 70.2 68.7 Yes 101 119
## 3 3 Finance 76.4 83.3 No 401 462
## Status Annual Salary
## 1 Placed 111000
## 2 Placed 107000
## 3 Placed 109000
Description of the variables:
Student ID: ID number of the student.
Undergrad Degree: Undergraduate degree of the student.
Undegrad Grade: Undergraduate grade/percentage.
MBA Grade: MBA grade of the student.
Work Experience: Prior work experience (Yes/No).
Employability (Before): Employability score before MBA.
Employability (After): Employability score after MBA.
Status: Placement status (Placed/Not Placed).
Annual Salary: Annual salary after MBA (USD).
#Clean column names (spaces & parentheses -> underscores)
names(mydata1) <- tolower(gsub("[()]", "", gsub("\\s+", "_", names(mydata1))))
names(mydata1)[names(mydata1) == "undergrad_degree"] <- "ug_degree"
names(mydata1)[names(mydata1) == "undergrad_grade"] <- "ug_grade"
mydata1$ug_degree <- factor(mydata1$ug_degree)
#Check for missing values in the variable we will plot
colSums(is.na(mydata1["ug_degree"]))
## ug_degree
## 0
head(as.data.frame(mydata1), 3)
## student_id ug_degree ug_grade mba_grade work_experience employability_before employability_after status
## 1 1 Business 68.4 90.2 No 252 276 Placed
## 2 2 Computer Science 70.2 68.7 Yes 101 119 Placed
## 3 3 Finance 76.4 83.3 No 401 462 Placed
## annual_salary
## 1 111000
## 2 107000
## 3 109000
#Counts + percentages
deg_tab <- mydata1 |>
dplyr::count(ug_degree) |>
dplyr::mutate(percent = round(100 * n / sum(n), 1))
deg_tab
## # A tibble: 5 × 3
## ug_degree n percent
## <fct> <int> <dbl>
## 1 Art 6 6
## 2 Business 35 35
## 3 Computer Science 25 25
## 4 Engineering 9 9
## 5 Finance 25 25
ggplot(deg_tab, aes(x = reorder(ug_degree, -percent), y = percent, fill = ug_degree)) + #reorder highest to lowest
geom_col(width = 0.7, alpha = 0.9) +
geom_text(aes(label = paste0(percent, "%")), vjust = -0.5, fontface = "bold") + #adds % labels above bars
scale_fill_brewer(palette = "Pastel1") +
labs(title = "Distribution of Undergrad Degrees",
x = "Undergrad degree", y = "Percentage") +
theme_minimal(base_size = 12) +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5)) +
scale_y_continuous(limits = c(0, max(deg_tab$percent) * 1.15))
The most common undergraduate degree is Business (35%).
summary(mydata1$annual_salary)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20000 87125 103500 109058 124000 340000
round(pastecs::stat.desc(mydata1$annual_salary), 1)
## nbr.val nbr.null nbr.na min max range sum median mean
## 100.0 0.0 0.0 20000.0 340000.0 320000.0 10905800.0 103500.0 109058.0
## SE.mean CI.mean.0.95 var std.dev coef.var
## 4150.1 8234.8 1722373474.7 41501.5 0.4
ggplot(mydata1, aes(x = annual_salary)) +
geom_histogram(binwidth = 5000, fill = "pink", color = "white", alpha = 0.9) +
geom_density(aes(y = after_stat(count) * 5000), color = "black", linewidth = 1) + #density curve
geom_vline(xintercept = mean(mydata1$annual_salary, na.rm = TRUE),
color = "navyblue", linewidth = 1) + #mean (blue)
geom_vline(xintercept = median(mydata1$annual_salary, na.rm = TRUE),
color = "red3", linewidth = 1, linetype = "dashed") + #median (dashed red)
annotate("text",
x = mean(mydata1$annual_salary, na.rm = TRUE), y = Inf, vjust = 1.6,
label = paste0("Mean = ", format(round(mean(mydata1$annual_salary), 0), big.mark = ",")),
color = "navyblue", fontface = "bold") +
annotate("text",
x = median(mydata1$annual_salary, na.rm = TRUE), y = Inf, vjust = 3.2,
label = paste0("Median = ", format(round(median(mydata1$annual_salary), 0), big.mark = ",")),
color = "red3", fontface = "bold") +
labs(title = "Distribution of Annual Salary",
x = "Annual salary (USD)", y = "Count") +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(hjust = 0.5)) +
coord_cartesian(ylim = c(0, NA), expand = FALSE) +
scale_x_continuous(labels = comma) #fix axis labels
Description of distribution:
H0: Mu equal to 74
H1: Mu not equal to 74
t.test(mydata1$mba_grade, mu = 74)
##
## One Sample t-test
##
## data: mydata1$mba_grade
## t = 2.6587, df = 99, p-value = 0.00915
## alternative hypothesis: true mean is not equal to 74
## 95 percent confidence interval:
## 74.51764 77.56346
## sample estimates:
## mean of x
## 76.04055
Since the p-value (0.00915) is below 0.05, we reject the null hypothesis and conclude that there is a statistically significant difference. The average MBA grade this year (M = 76.04) is significantly higher than last year’s mean of 74.
library(effectsize)
cohens_d(mydata1$mba_grade, mu = 74)
## Cohen's d | 95% CI
## ------------------------
## 0.27 | [0.07, 0.46]
##
## - Deviation from a difference of 74.
interpret_cohens_d(0.27, rules = "sawilowsky2009")
## [1] "small"
## (Rules: sawilowsky2009)
The effect size (d = 0.27) is small, showing that the difference, while statistically significant, is modest in practical terms.
mydata2 <- read_xlsx("~/Desktop/IMB/Bootcamp/Statistics + R/R working directory/Bootcamp_25_26/R Take Home Exam 2025/Task 3/Apartments.xlsx")
str(mydata2)
## tibble [85 × 5] (S3: tbl_df/tbl/data.frame)
## $ Age : num [1:85] 7 18 7 28 18 28 14 18 22 25 ...
## $ Distance: num [1:85] 28 1 28 29 18 12 20 6 7 2 ...
## $ Price : num [1:85] 1640 2800 1660 1850 1640 1770 1850 1970 2270 2570 ...
## $ Parking : num [1:85] 0 1 0 0 1 0 0 1 1 1 ...
## $ Balcony : num [1:85] 1 0 0 1 1 1 1 1 0 0 ...
head((mydata2),3)
## # A tibble: 3 × 5
## Age Distance Price Parking Balcony
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 7 28 1640 0 1
## 2 18 1 2800 1 0
## 3 7 28 1660 0 0
Description:
mydata2$Parking <- factor(mydata2$Parking,
levels = c(0,1),
labels = c("No", "Yes"))
mydata2$Balcony <- factor(mydata2$Balcony,
levels = c(0,1),
labels = c("No", "Yes"))
summary(mydata2)
## Age Distance Price Parking Balcony
## Min. : 1.00 Min. : 1.00 Min. :1400 No :42 No :48
## 1st Qu.:12.00 1st Qu.: 4.00 1st Qu.:1710 Yes:43 Yes:37
## Median :18.00 Median :12.00 Median :1950
## Mean :18.55 Mean :14.22 Mean :2019
## 3rd Qu.:24.00 3rd Qu.:20.00 3rd Qu.:2290
## Max. :45.00 Max. :45.00 Max. :2820
t.test(mydata2$Price, mu = 1900)
##
## One Sample t-test
##
## data: mydata2$Price
## t = 2.9022, df = 84, p-value = 0.004731
## alternative hypothesis: true mean is not equal to 1900
## 95 percent confidence interval:
## 1937.443 2100.440
## sample estimates:
## mean of x
## 2018.941
Since the p-value (0.0047) is below 0.05, we reject the null hypothesis and conclude that there is a statistically significant difference. The average apartment price per m² (M = 2018.94 €) is significantly higher than the hypothesized value of 1900 €.
cohens_d(mydata2$Price, mu = 1900)
## Cohen's d | 95% CI
## ------------------------
## 0.31 | [0.10, 0.53]
##
## - Deviation from a difference of 1900.
interpret_cohens_d(0.31, rules = "sawilowsky2009")
## [1] "small"
## (Rules: sawilowsky2009)
The effect size (d = 0.31) is small, meaning the difference, while statistically significant, is modest in practical terms.
fit1 <- lm(Price ~ Age,
data = mydata2)
summary(fit1)
##
## Call:
## lm(formula = Price ~ Age, data = mydata2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -623.9 -278.0 -69.8 243.5 776.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2185.455 87.043 25.108 <2e-16 ***
## Age -8.975 4.164 -2.156 0.034 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 369.9 on 83 degrees of freedom
## Multiple R-squared: 0.05302, Adjusted R-squared: 0.04161
## F-statistic: 4.647 on 1 and 83 DF, p-value: 0.03401
cor(mydata2$Price, mydata2$Age)
## [1] -0.230255
Explanation:
Regression coefficient estimate (b₁ = –8.98): If age of an apartment increases by 1 year, the price per m² goes down by 9 years on average.
Coefficient of correlation (r = -0.23): Linear relationship between Price and Age is weak and negative.
Coefficient of determination (R² = 0.053): About 5.3% of the variability of Price is explained by the linear effect of Age. This suggests that age alone explains very little of the price differences and most variation comes from other factors.
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
scatterplotMatrix(mydata2[, c(-4, -5)],
smooth = FALSE)
Multicolinearity:
fit2 <- lm(Price ~ Age + Distance,
data = mydata2)
summary(fit2)
##
## Call:
## lm(formula = Price ~ Age + Distance, data = mydata2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -603.23 -219.94 -85.68 211.31 689.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2460.101 76.632 32.10 < 2e-16 ***
## Age -7.934 3.225 -2.46 0.016 *
## Distance -20.667 2.748 -7.52 6.18e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 286.3 on 82 degrees of freedom
## Multiple R-squared: 0.4396, Adjusted R-squared: 0.4259
## F-statistic: 32.16 on 2 and 82 DF, p-value: 4.896e-11
Explanation:
If age of an apartment increeass by one year, the price goes down by 7.9 years on average at p < 0.05, controlling for all other variables.
If distance from city center increases by 1 km, the price goes down by 20.7 years on average at p < 0.001, controlling for all other variables.
43.96% of variability in price is explained by the linear effect of age and distance.
vif(fit2)
## Age Distance
## 1.001845 1.001845
mean(vif(fit2))
## [1] 1.001845
Explanation:
mydata2$StdResid <- round(rstandard(fit2),3)
mydata2$CooksD <- round(cooks.distance(fit2), 3)
hist(mydata2$StdResid,
xlab = "Standardized residuals",
main = "Histogram of standardized residuals",
col = "pink2")
head(mydata2[order(mydata2$StdResid), ], 3)
## # A tibble: 3 × 7
## Age Distance Price Parking Balcony StdResid CooksD
## <dbl> <dbl> <dbl> <fct> <fct> <dbl> <dbl>
## 1 7 2 1760 No Yes -2.15 0.066
## 2 12 14 1650 No Yes -1.50 0.013
## 3 12 14 1650 No No -1.50 0.013
Interpretation:
hist(mydata2$CooksD,
xlab = "Cook's distance",
main = "Histogram of Cook's distances",
col = "paleturquoise3")
head(mydata2[order(-mydata2$CooksD),], 6)
## # A tibble: 6 × 7
## Age Distance Price Parking Balcony StdResid CooksD
## <dbl> <dbl> <dbl> <fct> <fct> <dbl> <dbl>
## 1 5 45 2180 Yes Yes 2.58 0.32
## 2 43 37 1740 No No 1.44 0.104
## 3 2 11 2790 Yes No 2.05 0.069
## 4 7 2 1760 No Yes -2.15 0.066
## 5 37 3 2540 Yes Yes 1.58 0.061
## 6 40 2 2400 No Yes 1.09 0.038
threshold <- 0.06 #threshold of 0.06 for Cook’s distance, based on visible gaps in the distribution or formula 4/n (4/85 = 0.047)
mydata2 <- mydata2[mydata2$CooksD <= threshold, ]
hist(mydata2$CooksD,
xlab = "Cook's distance",
main = "Histogram of Cook's distances",
col = "paleturquoise3")
Interpretation:
fit2 <- lm(Price ~ Age + Distance, data = mydata2)
mydata2$StdResid <- rstandard(fit2)
mydata2$StdFitted <- as.numeric(scale(fit2$fitted.values))
library(car)
scatterplot(y = mydata2$StdResid, x = mydata2$StdFitted,
ylab = "Standardized residuals",
xlab = "Standardized fitted values",
boxplots = FALSE, regLine = FALSE, smooth = FALSE)
Explanation:
hist(mydata2$StdResid,
xlab = "Standardized residuals",
ylab = "Frequency",
main = "Histogram of standardized residuals",
col = "pink2")
shapiro.test(mydata2$StdResid)
##
## Shapiro-Wilk normality test
##
## data: mydata2$StdResid
## W = 0.94156, p-value = 0.001168
Explanation: - The histogram of standardized residuals does not resemble a perfectly bell-shaped curve, indicating some deviation from normality.
fit2 <- lm(Price ~ Age + Distance, data = mydata2)
summary(fit2)
##
## Call:
## lm(formula = Price ~ Age + Distance, data = mydata2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -411.50 -203.69 -45.24 191.11 492.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2502.467 75.024 33.356 < 2e-16 ***
## Age -8.674 3.221 -2.693 0.00869 **
## Distance -24.063 2.692 -8.939 1.57e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 256.8 on 77 degrees of freedom
## Multiple R-squared: 0.5361, Adjusted R-squared: 0.524
## F-statistic: 44.49 on 2 and 77 DF, p-value: 1.437e-13
Explanation:
If the age of an apartment increases by one year, the price decreases on average by 8.67 €/m² at p < 0.01, controlling for distance.
If the distance from the city center increases by one kilometer, the price decreases on average by 24.06 €/m² at p < 0.001, controlling for age.
About 53.61% of the variability in price is explained by the linear effect of age and distance (the explanatory power (R²) improved from 43.9% to 53.6%, showing that removing influential outliers strengthened the model.)
fit3 <- lm(Price ~ Age + Distance + Parking + Balcony, data = mydata2)
Note:
anova(fit2, fit3)
## Analysis of Variance Table
##
## Model 1: Price ~ Age + Distance
## Model 2: Price ~ Age + Distance + Parking + Balcony
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 77 5077362
## 2 75 4791128 2 286234 2.2403 0.1135
Explanation:
summary(fit3)
##
## Call:
## lm(formula = Price ~ Age + Distance + Parking + Balcony, data = mydata2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -390.93 -198.19 -53.64 186.73 518.34
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2393.316 93.930 25.480 < 2e-16 ***
## Age -7.970 3.191 -2.498 0.0147 *
## Distance -21.961 2.830 -7.762 3.39e-11 ***
## ParkingYes 128.700 60.801 2.117 0.0376 *
## BalconyYes 6.032 57.307 0.105 0.9165
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 252.7 on 75 degrees of freedom
## Multiple R-squared: 0.5623, Adjusted R-squared: 0.5389
## F-statistic: 24.08 on 4 and 75 DF, p-value: 7.764e-13
Explanation of regression coefficients:
Parking: Given the age, distance and balcony presence, apartments with parking have a price that is 128.7 € higher on average than apartments without parking (p < 0.05).
Balcony: Given the age, distance and parking availability, apartments with a balcony have a price that is 6.03 € higher on average than apartments without a balcony, but this is not statistically significant (p > 0.05).
F statistic hypothesis:
H₀: ρ² = 0 (or equivalently β₁ = β₂ = … = βₖ = 0). This means that none of the explanatory variables (Age, Distance, Parking, Balcony) have an effect on apartment price, and the model does not explain any variability.
H₁: ρ² > 0 (or at least one βⱼ ≠ 0). This means that at least one explanatory variable does have an effect, and the model explains part of the variability in price.
Since p < 0.001, we reject H₀ and conclude the model is overall significant
mydata2$FittedValues <- fitted.values(fit3)
mydata2$Residuals <- residuals(fit3)
#Show the 2nd row (apartment ID2)
round(mydata2[2, c("Price", "FittedValues", "Residuals")],2)
## # A tibble: 1 × 3
## Price FittedValues Residuals
## <dbl> <dbl> <dbl>
## 1 2800 2357. 443.