Homework 1
Alima Dzhanybaeva
Downloading dataset
df <- data.table(readRDS('flats.rds'))
Task 1
ggplot(df, aes(x = Area, y =..count..)) +
geom_histogram(fill='#006D77', color = 'white', stat = "bin", binwidth = 5) +
labs(title = 'Distribution of the Area of flats (m2)') +
theme_bw()

Task 2
df1 <- df %>% filter(!is.na(Condition))
ggplot(df1, aes(x=Price, fill=Condition)) +
geom_density(alpha=0.3) +
scale_x_continuous(breaks = c(0, 250000, 500000, 750000, 1000000),
label = c("0 Ft", "250,000 Ft", "500,000 Ft",
"750,000 Ft", "1,000,000 Ft")) +
theme_bw() +
theme(legend.position="top") +
guides(fill=guide_legend(nrow=1, byrow=TRUE)) +
labs(title = 'Price distribution for flats in different conditions')

Task 3
ggplot(df1, aes(x=Area, y=Price, color = Condition)) +
geom_point(alpha=0.5, size = 2)+
geom_smooth(method=lm, se=FALSE) +
scale_y_continuous(breaks = c(0, 250000, 500000, 750000, 1000000),
label = c("0 Ft", "250,000 Ft", "500,000 Ft",
"750,000 Ft", "1,000,000 Ft")) +
scale_x_continuous(breaks = c(0, 50, 100, 150, 200),
labels = c("0 m2", "50 m2", "100 m2",
"150 m2", "200 m2")) +
theme_bw() +
theme(legend.position="bottom") +
guides(color=guide_legend(nrow=1, byrow=TRUE)) +
labs(title = 'How the condition of the flats effects price to area')
## `geom_smooth()` using formula = 'y ~ x'

Task 4
df2 <- df %>% group_by(District) %>%
summarise(Price = mean(Price))
ggplot(df2, aes(x=District, y = Price)) +
geom_histogram(stat = 'identity', fill='#006D77', color = 'white') +
scale_x_continuous(breaks=seq(1,23,1)) +
scale_y_continuous(breaks = c(0, 100000, 200000, 300000),
label = c("0 Ft", "100,000 Ft",
"200,000 Ft", "300,000 Ft"))+
ylab('Average price') +
theme_bw()

Task 5
ggplot(df, aes(x=as.factor(District), y=Price)) +
geom_violin(color = '#006D77', fill = '#66B7B0', linewidth = 0.6) +
scale_y_continuous(breaks = c(0, 250000, 500000, 750000, 1000000),
label = c("0 Ft", "250,000 Ft", "500,000 Ft",
"750,000 Ft", "1,000,000 Ft")) +
theme_bw() +
xlab('District')

Task 6
df3 <- df %>% rename('Parking_fee_monthly'='Parking_fee(monthly)') %>%
group_by(District) %>%
summarise(Distirct.mean = mean(District),
Didtrict.sd = sd(District),
Area.mean = mean(Area),
Area.sd = sd(Area),
Num_whole_rooms.mean = mean(Num_whole_rooms),
Num_whole_rooms.sd = sd(Num_whole_rooms),
Num_half_rooms.mean = mean(Num_half_rooms, na.rm = TRUE),
Num_half_rooms.sd = sd(Num_half_rooms, na.rm = TRUE),
Price.mean = mean(Price),
Price.sd = sd(Price),
Floor.mean = mean(Floor, na.rm = TRUE),
Floor.sd = sd(Floor, na.rm = TRUE),
Floors_in_bdg.mean = mean(Floors_in_bdg, na.rm = TRUE),
Floors_in_bdg.sd = sd(Floors_in_bdg, na.rm = TRUE),
Overhead.mean = mean(Overhead, na.rm = TRUE),
Overhead.sd = sd(Overhead, na.rm = TRUE),
Parking_fee_monthly.mean = mean(Parking_fee_monthly, na.rm = TRUE),
Parking_fee_monthly.sd = sd(Parking_fee_monthly, na.rm = TRUE))
mds <- cmdscale(dist(scale(df3)))
mds <- as.data.frame(mds)
mds$District <- rownames(mds)
ggplot(mds, aes(V1, V2, label = District)) +
geom_label(size=3.5) + theme_void() +
ggtitle("Budapest districts") +
theme(plot.title = element_text(hjust = 0.5))
