#rm(list=ls())
options(repos = c(CRAN = "https://cran.rstudio.com/"))
#Zunächst gilt es die nötigen Pakete zu laden.
install.packages("ggplot2")
## Installiere Paket nach 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (da 'lib' nicht spezifiziert)
## Paket 'ggplot2' erfolgreich ausgepackt und MD5 Summen abgeglichen
##
## Die heruntergeladenen Binärpakete sind in
## C:\Users\DELL\AppData\Local\Temp\RtmpIt3gkO\downloaded_packages
install.packages("dplyr")
## Installiere Paket nach 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (da 'lib' nicht spezifiziert)
## Paket 'dplyr' erfolgreich ausgepackt und MD5 Summen abgeglichen
## Warning: kann alte Installation von Paket 'dplyr' nicht entfernen
## Warning in file.copy(savedcopy, lib, recursive = TRUE): Problem
## C:\Users\DELL\AppData\Local\R\win-library\4.4\00LOCK\dplyr\libs\x64\dplyr.dll
## nach C:\Users\DELL\AppData\Local\R\win-library\4.4\dplyr\libs\x64\dplyr.dll zu
## kopieren: Permission denied
## Warning: 'dplyr' wiederhergestellt
##
## Die heruntergeladenen Binärpakete sind in
## C:\Users\DELL\AppData\Local\Temp\RtmpIt3gkO\downloaded_packages
install.packages("randomForest")
## Installiere Paket nach 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (da 'lib' nicht spezifiziert)
## Paket 'randomForest' erfolgreich ausgepackt und MD5 Summen abgeglichen
## Warning: kann alte Installation von Paket 'randomForest' nicht entfernen
## Warning in file.copy(savedcopy, lib, recursive = TRUE): Problem
## C:\Users\DELL\AppData\Local\R\win-library\4.4\00LOCK\randomForest\libs\x64\randomForest.dll
## nach
## C:\Users\DELL\AppData\Local\R\win-library\4.4\randomForest\libs\x64\randomForest.dll
## zu kopieren: Permission denied
## Warning: 'randomForest' wiederhergestellt
##
## Die heruntergeladenen Binärpakete sind in
## C:\Users\DELL\AppData\Local\Temp\RtmpIt3gkO\downloaded_packages
install.packages("performance")
## Installiere Paket nach 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (da 'lib' nicht spezifiziert)
## Paket 'performance' erfolgreich ausgepackt und MD5 Summen abgeglichen
##
## Die heruntergeladenen Binärpakete sind in
## C:\Users\DELL\AppData\Local\Temp\RtmpIt3gkO\downloaded_packages
install.packages("skimr")
## Installiere Paket nach 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (da 'lib' nicht spezifiziert)
## Paket 'skimr' erfolgreich ausgepackt und MD5 Summen abgeglichen
##
## Die heruntergeladenen Binärpakete sind in
## C:\Users\DELL\AppData\Local\Temp\RtmpIt3gkO\downloaded_packages
install.packages("corrplot")
## Installiere Paket nach 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (da 'lib' nicht spezifiziert)
## Paket 'corrplot' erfolgreich ausgepackt und MD5 Summen abgeglichen
##
## Die heruntergeladenen Binärpakete sind in
## C:\Users\DELL\AppData\Local\Temp\RtmpIt3gkO\downloaded_packages
install.packages("GGally")
## Installiere Paket nach 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (da 'lib' nicht spezifiziert)
## Paket 'GGally' erfolgreich ausgepackt und MD5 Summen abgeglichen
##
## Die heruntergeladenen Binärpakete sind in
## C:\Users\DELL\AppData\Local\Temp\RtmpIt3gkO\downloaded_packages
install.packages("patchwork")
## Installiere Paket nach 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (da 'lib' nicht spezifiziert)
## Paket 'patchwork' erfolgreich ausgepackt und MD5 Summen abgeglichen
##
## Die heruntergeladenen Binärpakete sind in
## C:\Users\DELL\AppData\Local\Temp\RtmpIt3gkO\downloaded_packages
install.packages("scales")
## Installiere Paket nach 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (da 'lib' nicht spezifiziert)
## Paket 'scales' erfolgreich ausgepackt und MD5 Summen abgeglichen
##
## Die heruntergeladenen Binärpakete sind in
## C:\Users\DELL\AppData\Local\Temp\RtmpIt3gkO\downloaded_packages
install.packages("ggdist")
## Installiere Paket nach 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (da 'lib' nicht spezifiziert)
## Paket 'ggdist' erfolgreich ausgepackt und MD5 Summen abgeglichen
## Warning: kann alte Installation von Paket 'ggdist' nicht entfernen
## Warning in file.copy(savedcopy, lib, recursive = TRUE): Problem
## C:\Users\DELL\AppData\Local\R\win-library\4.4\00LOCK\ggdist\libs\x64\ggdist.dll
## nach C:\Users\DELL\AppData\Local\R\win-library\4.4\ggdist\libs\x64\ggdist.dll
## zu kopieren: Permission denied
## Warning: 'ggdist' wiederhergestellt
##
## Die heruntergeladenen Binärpakete sind in
## C:\Users\DELL\AppData\Local\Temp\RtmpIt3gkO\downloaded_packages
#Gleiches für die Biblotheken
library(ggplot2)
library(dplyr)
##
## Attache Paket: 'dplyr'
## Die folgenden Objekte sind maskiert von 'package:stats':
##
## filter, lag
## Die folgenden Objekte sind maskiert von 'package:base':
##
## intersect, setdiff, setequal, union
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attache Paket: 'randomForest'
## Das folgende Objekt ist maskiert 'package:dplyr':
##
## combine
## Das folgende Objekt ist maskiert 'package:ggplot2':
##
## margin
library(performance)
library(skimr)
library(corrplot)
## corrplot 0.95 loaded
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(patchwork)
library(scales)
library(ggdist)
getwd()
## [1] "C:/Users/DELL/OneDrive/Dokumente/01_03_RStudio/HousePricePrediction/Projektarbeit"
#Datensatz umfasst 545 Objekte aus 13 Variablen
housing <- read.csv("Housing.csv")
#Struktur der Daten
#Erste Einblicke in numerische und kategoriale Variablen
str(housing)
## 'data.frame': 545 obs. of 13 variables:
## $ price : int 13300000 12250000 12250000 12215000 11410000 10850000 10150000 10150000 9870000 9800000 ...
## $ area : int 7420 8960 9960 7500 7420 7500 8580 16200 8100 5750 ...
## $ bedrooms : int 4 4 3 4 4 3 4 5 4 3 ...
## $ bathrooms : int 2 4 2 2 1 3 3 3 1 2 ...
## $ stories : int 3 4 2 2 2 1 4 2 2 4 ...
## $ mainroad : chr "yes" "yes" "yes" "yes" ...
## $ guestroom : chr "no" "no" "no" "no" ...
## $ basement : chr "no" "no" "yes" "yes" ...
## $ hotwaterheating : chr "no" "no" "no" "no" ...
## $ airconditioning : chr "yes" "yes" "no" "yes" ...
## $ parking : int 2 3 2 3 2 2 2 0 2 1 ...
## $ prefarea : chr "yes" "no" "yes" "yes" ...
## $ furnishingstatus: chr "furnished" "furnished" "semi-furnished" "furnished" ...
#Zusammenfassung der Daten
summary(housing)
## price area bedrooms bathrooms
## Min. : 1750000 Min. : 1650 Min. :1.000 Min. :1.000
## 1st Qu.: 3430000 1st Qu.: 3600 1st Qu.:2.000 1st Qu.:1.000
## Median : 4340000 Median : 4600 Median :3.000 Median :1.000
## Mean : 4766729 Mean : 5151 Mean :2.965 Mean :1.286
## 3rd Qu.: 5740000 3rd Qu.: 6360 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :13300000 Max. :16200 Max. :6.000 Max. :4.000
## stories mainroad guestroom basement
## Min. :1.000 Length:545 Length:545 Length:545
## 1st Qu.:1.000 Class :character Class :character Class :character
## Median :2.000 Mode :character Mode :character Mode :character
## Mean :1.806
## 3rd Qu.:2.000
## Max. :4.000
## hotwaterheating airconditioning parking prefarea
## Length:545 Length:545 Min. :0.0000 Length:545
## Class :character Class :character 1st Qu.:0.0000 Class :character
## Mode :character Mode :character Median :0.0000 Mode :character
## Mean :0.6936
## 3rd Qu.:1.0000
## Max. :3.0000
## furnishingstatus
## Length:545
## Class :character
## Mode :character
##
##
##
#Die weitere Arbeit erfolgt nun mit dem Datensatz "h1".
h1 <- housing
#Überprüfung auf fehlende Daten
colSums(is.na(h1))
## price area bedrooms bathrooms
## 0 0 0 0
## stories mainroad guestroom basement
## 0 0 0 0
## hotwaterheating airconditioning parking prefarea
## 0 0 0 0
## furnishingstatus
## 0
#Zusammenfassung und Kontrolle der Datenqualität
library(skimr)
h1_skim_tibble <- skim(h1) %>% tibble::as_tibble()
print(h1_skim_tibble)
## # A tibble: 13 × 17
## skim_type skim_variable n_missing complete_rate character.min character.max
## <chr> <chr> <int> <dbl> <int> <int>
## 1 character mainroad 0 1 2 3
## 2 character guestroom 0 1 2 3
## 3 character basement 0 1 2 3
## 4 character hotwaterheating 0 1 2 3
## 5 character airconditioning 0 1 2 3
## 6 character prefarea 0 1 2 3
## 7 character furnishingstat… 0 1 9 14
## 8 numeric price 0 1 NA NA
## 9 numeric area 0 1 NA NA
## 10 numeric bedrooms 0 1 NA NA
## 11 numeric bathrooms 0 1 NA NA
## 12 numeric stories 0 1 NA NA
## 13 numeric parking 0 1 NA NA
## # ℹ 11 more variables: character.empty <int>, character.n_unique <int>,
## # character.whitespace <int>, numeric.mean <dbl>, numeric.sd <dbl>,
## # numeric.p0 <dbl>, numeric.p25 <dbl>, numeric.p50 <dbl>, numeric.p75 <dbl>,
## # numeric.p100 <dbl>, numeric.hist <chr>
# Typen der Variablen anpassen
h1$mainroad <- as.factor(h1$mainroad)
h1$guestroom <- as.factor(h1$guestroom)
h1$basement <- as.factor(h1$basement)
h1$hotwaterheating <- as.factor(h1$hotwaterheating)
h1$airconditioning <- as.factor(h1$airconditioning)
h1$prefarea <- as.factor(h1$prefarea)
h1$furnishingstatus <- as.factor(h1$furnishingstatus)
#Struktur der Daten
#Erneute Einblicke in numerische und kategoriale Variablen
str(h1)
## 'data.frame': 545 obs. of 13 variables:
## $ price : int 13300000 12250000 12250000 12215000 11410000 10850000 10150000 10150000 9870000 9800000 ...
## $ area : int 7420 8960 9960 7500 7420 7500 8580 16200 8100 5750 ...
## $ bedrooms : int 4 4 3 4 4 3 4 5 4 3 ...
## $ bathrooms : int 2 4 2 2 1 3 3 3 1 2 ...
## $ stories : int 3 4 2 2 2 1 4 2 2 4 ...
## $ mainroad : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ guestroom : Factor w/ 2 levels "no","yes": 1 1 1 1 2 1 1 1 2 2 ...
## $ basement : Factor w/ 2 levels "no","yes": 1 1 2 2 2 2 1 1 2 1 ...
## $ hotwaterheating : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ airconditioning : Factor w/ 2 levels "no","yes": 2 2 1 2 2 2 2 1 2 2 ...
## $ parking : int 2 3 2 3 2 2 2 0 2 1 ...
## $ prefarea : Factor w/ 2 levels "no","yes": 2 1 2 2 1 2 2 1 2 2 ...
## $ furnishingstatus: Factor w/ 3 levels "furnished","semi-furnished",..: 1 1 2 1 1 2 2 3 1 3 ...
#Zusammenfassung der "neuen" Daten
summary(h1)
## price area bedrooms bathrooms
## Min. : 1750000 Min. : 1650 Min. :1.000 Min. :1.000
## 1st Qu.: 3430000 1st Qu.: 3600 1st Qu.:2.000 1st Qu.:1.000
## Median : 4340000 Median : 4600 Median :3.000 Median :1.000
## Mean : 4766729 Mean : 5151 Mean :2.965 Mean :1.286
## 3rd Qu.: 5740000 3rd Qu.: 6360 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :13300000 Max. :16200 Max. :6.000 Max. :4.000
## stories mainroad guestroom basement hotwaterheating airconditioning
## Min. :1.000 no : 77 no :448 no :354 no :520 no :373
## 1st Qu.:1.000 yes:468 yes: 97 yes:191 yes: 25 yes:172
## Median :2.000
## Mean :1.806
## 3rd Qu.:2.000
## Max. :4.000
## parking prefarea furnishingstatus
## Min. :0.0000 no :417 furnished :140
## 1st Qu.:0.0000 yes:128 semi-furnished:227
## Median :0.0000 unfurnished :178
## Mean :0.6936
## 3rd Qu.:1.0000
## Max. :3.0000
#zusammenfassung der numerischen Variablenn
summary(h1[c("price", "area", "bedrooms", "bathrooms", "stories", "parking")])
## price area bedrooms bathrooms
## Min. : 1750000 Min. : 1650 Min. :1.000 Min. :1.000
## 1st Qu.: 3430000 1st Qu.: 3600 1st Qu.:2.000 1st Qu.:1.000
## Median : 4340000 Median : 4600 Median :3.000 Median :1.000
## Mean : 4766729 Mean : 5151 Mean :2.965 Mean :1.286
## 3rd Qu.: 5740000 3rd Qu.: 6360 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :13300000 Max. :16200 Max. :6.000 Max. :4.000
## stories parking
## Min. :1.000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.0000
## Median :2.000 Median :0.0000
## Mean :1.806 Mean :0.6936
## 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :4.000 Max. :3.0000
#Datensatz der numerischen Variablen
h1_num <- select_if(h1, is.numeric)
#Korrelationsmatrix der numerischen Varablen I
cor(h1_num)
## price area bedrooms bathrooms stories parking
## price 1.0000000 0.53599735 0.3664940 0.5175453 0.42071237 0.38439365
## area 0.5359973 1.00000000 0.1518585 0.1938195 0.08399605 0.35298048
## bedrooms 0.3664940 0.15185849 1.0000000 0.3739302 0.40856424 0.13926990
## bathrooms 0.5175453 0.19381953 0.3739302 1.0000000 0.32616471 0.17749582
## stories 0.4207124 0.08399605 0.4085642 0.3261647 1.00000000 0.04554709
## parking 0.3843936 0.35298048 0.1392699 0.1774958 0.04554709 1.00000000
h1_corr_mat <- round(cor(h1_num), 2)
#Korrelationsmatrix der numerischen Varablen II
#cl.pos = "n" entfernt Legende)
corrplot(h1_corr_mat, method = "color", title = "Korrelationsmatrix", addCoef.col = "black", number.cex = 0.8, tl.col = "black", tl.srt = 45,tl.cex = 1, mar = c(1, 1, 2, 1), cl.pos = "n" )

#library(GGally)
GGally::ggpairs(h1_num)

#Wenn man den Plot in die Console einträgt, erhält man diesen rechts unten in "Plots" und kann abspeichern"
#Streudiagramm: Fläche vs. Preis
ggplot(h1, aes(x = area, y = price)) +
geom_point(color = "skyblue") +
theme_minimal() +
scale_y_continuous(labels = dollar_format()) +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.text = element_text(size = 10), legend.title = element_text(size = 10)) +
labs(title = "Scatter Plot of Area vs. Price", x = "", y = "")
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Weitere Analysen zu "furnishingstatus"
#Barplot für Möblierungsstatus
library(ggplot2)
ggplot(h1, aes(x = furnishingstatus, fill = furnishingstatus)) +
geom_bar() +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none')+
labs(title = "Distribution of Furnishing Status", x = "", y = "")

#Zusammenfassung pro Möblierungsstatus nach price und area
h1 %>%
group_by(furnishingstatus) %>%
summarise(
count = n(),
mean_price = mean(price),
median_price = median(price),
sd_price = sd(price),
mean_area = mean(area),
median_area = median(area),
sd_area = sd(area)
)
## # A tibble: 3 × 8
## furnishingstatus count mean_price median_price sd_price mean_area median_area
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 furnished 140 5495696 5075000 2117857. 5688. 5800
## 2 semi-furnished 227 4907524. 4585000 1596688. 5166. 4600
## 3 unfurnished 178 4013831. 3430000 1720247. 4708. 4075
## # ℹ 1 more variable: sd_area <dbl>
#Boxplot: Preise nach Möblierungsstatus
ggplot(h1, aes(x = furnishingstatus, y = price, fill = furnishingstatus)) +
geom_boxplot() +
scale_y_continuous(labels = dollar_format()) +
geom_hline(aes(yintercept=mean(price)), colour = "red", linetype= "dashed", lwd=0.5) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none')+
labs(title = "Housing Prices by Furnishing Status", x = "", y = "")

#Boxplot: Fläche nach Möblierungsstatus
ggplot(data=h1, mapping = aes(x=furnishingstatus, y=area))+
ggdist::stat_halfeye(aes(slab_colour=furnishingstatus),adjust=0.6,justification=-0.25,.width=0,point_colour= NA)+
stat_boxplot(geom ='errorbar', width=0.4, position=position_dodge(width=0.1),alpha=0.5) +
geom_boxplot(aes(x=furnishingstatus, y=area,fill=furnishingstatus),
width=0.4,position=position_dodge(width=0.1),show.legend = FALSE)+
geom_jitter(color="black",size=1,position = position_jitter(w = 0.1, h = 0.1))+
labs(title="Housing Area by Furnishing Status",
x ="", y = "") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none')

#Boxplot: Preis nach Möblierungsstatus
ggplot(data=h1, mapping = aes(x=furnishingstatus, y=price))+
ggdist::stat_halfeye(aes(slab_colour=furnishingstatus),adjust=0.6,justification=-0.25,.width=0,point_colour= NA)+
stat_boxplot(geom ='errorbar', width=0.4, position=position_dodge(width=0.1),alpha=0.5) +
scale_y_continuous(labels = dollar_format()) +
geom_boxplot(aes(x=furnishingstatus, y=price,fill=furnishingstatus),
width=0.4,position=position_dodge(width=0.1),show.legend = FALSE)+
geom_jitter(color="black",size=1,position = position_jitter(w = 0.1, h = 0.1))+
labs(title="Housing Prices by Furnishing Status",
x ="", y = "") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none')

#Scatterplot: Preis vs. Fläche - gefärbt nach Möblierungsstatus
ggplot(h1, aes(x = area, y = price, color = furnishingstatus)) +
geom_point() +
scale_y_continuous(labels = dollar_format()) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.text = element_text(size = 10), legend.title = element_text(size = 10))+
labs(title = "Scatter Plot of Price vs. Area by Furnishing Status", x = "", y = "")

#Weiter mit "price"
#Histogramm der Immobilienpreise
ggplot(h1, aes(x = price)) +
geom_histogram(binwidth = 500000, fill = "skyblue", color = "black") +
scale_x_continuous(labels = dollar_format()) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"))+
labs(title = "Histogram of Housing Prices", x = "", y = "")

#Weitere EDA zu "mainroad"
#Zusammenfassung pro mainroad nach price und area
h1 %>%
group_by(mainroad) %>%
summarise(
count = n(),
mean_price = mean(price),
median_price = median(price),
sd_price = sd(price),
mean_area = mean(area),
median_area = median(area),
sd_area = sd(area)
)
## # A tibble: 2 × 8
## mainroad count mean_price median_price sd_price mean_area median_area sd_area
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 no 77 3398905. 3290000 894735. 3606. 3410 1185.
## 2 yes 468 4991777. 4550000 1893639. 5405. 4975 2191.
# Durchschnittspreise nach furnishingstatus und mainroad
h1 %>%
group_by(furnishingstatus, mainroad) %>%
summarise(
count = n(),
mean_price = mean(price),
median_price = median(price),
sd_price = sd(price)
)
## `summarise()` has grouped output by 'furnishingstatus'. You can override using
## the `.groups` argument.
## # A tibble: 6 × 6
## # Groups: furnishingstatus [3]
## furnishingstatus mainroad count mean_price median_price sd_price
## <fct> <fct> <int> <dbl> <dbl> <dbl>
## 1 furnished no 9 3412111. 3353000 998924.
## 2 furnished yes 131 5638843. 5250000 2100769.
## 3 semi-furnished no 31 3796247. 3815000 891339.
## 4 semi-furnished yes 196 5083288. 4690000 1614032.
## 5 unfurnished no 37 3062784. 2975000 744397.
## 6 unfurnished yes 141 4263397. 3500000 1815785.
ggplot(h1, aes(x = area, y = price, color = mainroad)) +
geom_point() +
scale_y_continuous(labels = dollar_format()) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.text = element_text(size = 10), legend.title = element_text(size = 10)) +
labs(title = "Scatter Plot of Price vs. Area by Main Road Access", x = "", y = "")

#Restliche Attribute
# Boxplot bathrooms
A <- ggplot(h1, aes(x = factor(bathrooms), y = price, fill = factor(bathrooms))) +
geom_boxplot() +
scale_y_continuous(labels = function(x) paste0("$ ", x / 1e6, " Mio")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none') +
labs(title = "Housing Prices by Number of Bathrooms", x = "", y = "")
# Boxplot stories
B <- ggplot(h1, aes(x = factor(stories), y = price, fill = factor(stories))) +
geom_boxplot() +
scale_y_continuous(labels = function(x) paste0("$ ", x / 1e6, " Mio")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none') +
labs(title = "Housing Prices by Number of Stories", x = "", y = "")
# Boxplot mainroad
C <- ggplot(h1, aes(x = mainroad, y = price, fill = mainroad)) +
geom_boxplot() +
scale_y_continuous(labels = function(x) paste0("$ ", x / 1e6, " Mio")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none') +
labs(title = "Housing Prices by Main Road Access", x = "", y = "")
# Boxplot guestroom
D <- ggplot(h1, aes(x = guestroom, y = price, fill = guestroom)) +
geom_boxplot() +
scale_y_continuous(labels = function(x) paste0("$ ", x / 1e6, " Mio")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none') +
labs(title = "Housing Prices by Guest Room Availability", x = "", y = "")
# Boxplot basement
E <- ggplot(h1, aes(x = basement, y = price, fill = basement)) +
geom_boxplot() +
scale_y_continuous(labels = function(x) paste0("$ ", x / 1e6, " Mio")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none') +
labs(title = "Housing Prices by Basement Availability", x = "", y = "")
# Boxplot hotwaterheating
F <- ggplot(h1, aes(x = hotwaterheating, y = price, fill = hotwaterheating)) +
geom_boxplot() +
scale_y_continuous(labels = function(x) paste0("$ ", x / 1e6, " Mio")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none') +
labs(title = "Housing Prices by Hot Water Heating", x = "", y = "")
# Boxplot airconditioning
G <- ggplot(h1, aes(x = airconditioning, y = price, fill = airconditioning)) +
geom_boxplot() +
scale_y_continuous(labels = function(x) paste0("$ ", x / 1e6, " Mio")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none') +
labs(title = "Housing Prices by Air Conditioning Availability", x = "", y = "")
# Boxplot parking
H <- ggplot(h1, aes(x = factor(parking), y = price, fill = factor(parking))) +
geom_boxplot() +
scale_y_continuous(labels = function(x) paste0("$ ", x / 1e6, " Mio")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none') +
labs(title = "Housing Prices by Number of Parking Spaces", x = "", y = "")
#Boxplot: bedrooms
I <- ggplot(h1, aes(x = factor(bedrooms), y = price, fill = factor(bedrooms))) +
geom_boxplot() +
scale_y_continuous(labels = function(x) paste0("$ ", x / 1e6, " Mio")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none') +
labs(title = "Housing Prices by Number of Bedrooms", x = "", y = "")
# Boxplot guestroom
J <- ggplot(h1, aes(x = prefarea, y = price, fill = prefarea)) +
geom_boxplot() +
scale_y_continuous(labels = function(x) paste0("$ ", x / 1e6, " Mio")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0, size = 10, face = "bold"),
axis.line.x = element_line(color = "black", size = 0.2),axis.line.y = element_line(color = "black", size = 0.2),
axis.text.x = element_text(size = 10, color = "black"), axis.text.y = element_text(size = 10, color = "black"),
legend.position='none') +
labs(title = "Housing Prices by preferring area", x = "", y = "")
library(patchwork)
# HIER MUSS NOCH ANPASSUNGEN GEMACHT WERDEN - SIEHE ERSTE DATEI WIE MAN PDFS UND CO ERSTELLT
patch <- (J/C/B/D/H/E/A/F/I/G) + plot_layout(ncol=2,widths=c(1,1))+
plot_annotation(
title = 'xx',
theme = theme(plot.caption = element_text(hjust = 0,size = 10),plot.title = element_text(hjust = 0.5,size = 1)),
tag_levels = 'I') &
theme(plot.tag = element_text(size = 10))
patch
