Tugas UAS
Algoritma dan Struktur Data
| Kontak | : \(\downarrow\) |
| : garryjuliuspermana@gmail.com | |
| RPubs | : https://rpubs.com/Garr |
| NIM | : 20204920003 |
soal
Berikut ini saya lampirkan data perumahan di kota Melbourne Australia, dengan struktur data sebagai berikut:
Type: yang dibagi menjadi 3 bagian TipeA.csv TipeB.xlsx TipeC.rds
Dimana setiap tipe rumah (A,B,C) tersebut terdiri dari beberapa variabel sebagai berikut: - Suburb: Suburb - Rooms: Number of rooms - Price: Price in dollars - Method: Status of property sale - * S - property sold; * SP - property sold prior; * PI - property passed in; * PN - sold prior not disclosed; * SN - sold not disclosed; * VB - vendor bid; * W - withdrawn prior to auction; * SA - sold after auction; * SS - sold after auction price not disclosed. - SellerG: Real Estate Agent - Date: Date sold - Distance: Distance from CBD - Bedroom2: Number of Bedrooms - Bathroom: Number of Bathrooms - Car: Number of car spots - Landsize: Land Size - YearBuilt: Year the house was built - Regionname: General Region (West, North West, North, North east …etc)
menggabungkan data
pacman::p_load(readxl, writexl)
data1 <- read.csv("TipeA.csv",sep = ",")
data2 <- read_excel("TipeB.xlsx")
data3 <- readRDS("TipeC.rds")
data1$Type = "A-Cluster"
data2$Type = "B-Komplek"
data3$Type = "C-Residence"
df <- rbind(data1,data2,data3)
dfGabungkan data tipe A,B, C kedalam satu dataframe sehingga membentuk struktur data (Type,Subur, Price, Method, SellerG, Date, Distance,Bedroom2, Bathroom, Car, Landsize, YearBuilt, Regionname)
# Soal 1
ndf = data.frame("Type" =df$Type,
"Subur" =df$Suburb,
"Price"= df$Price,
"Method" = df$Method,
"SellerG" = df$SellerG,
"Date" = df$Date,
"Distance" = df$Distance,
"Bedroom2" = df$Bedroom2,
"Bathroom" = df$Bathroom,
"Car" = df$Car,
"Landsize" = df$Landsize,
"YearBuilt" = df$YearBuilt,
"Regionname" = df$Regionname)
ndfRename setiap variabel diatas kedalam bahasa indonesia
# Soal 2
rename <- ndf
names(rename) = c("Tipe",
"Pinggiran Kota",
"Harga",
"Metode",
"PenjualG",
"Tanggal" ,
"Jarak",
"Kamartidur2",
"Kamarmandi",
"Mobil",
"Luas",
"Tahunbangun",
"Daerah")
rename#wilayah = factor(c(rename$Daerah))
#library("dplyr")
# %>%
# group_by(rename$Daerah)%>%
# summarise(n_distinct(rename$Daerah))
#levels(wilayah) <- list(Utara.Met = "Northern Metropolitan",
# Selatan.Met = "Southern Metropolitan",
# Timur.Met = "Eastern Metropolitan",
# Barat.Met = "Western Metropolitan",
# Tenggara.Met = "South-Eastern Metropolitan",
# Utara.Vic = "Northern Victoria",
# Timur.Vic = "Eastern Victoria",
# Barat.Vic = "Western Victoria")
#rename$Daerah = wilayah
#renameRename kategori variabel “Regionname” kedalam bahasa Indonesia.
# Soal 3
rename$Daerah[rename$Daerah=="Southern Metropolitan"] <- "Selatan"
rename$Daerah[rename$Daerah=="Northern Metropolitan"] <- "Utara"
rename$Daerah[rename$Daerah=="Western Metropolitan"] <- "Barat"
rename$Daerah[rename$Daerah=="Eastern Metropolitan"] <- "Timur"
rename$Daerah[rename$Daerah=="South-Eastern Metropolitan"] <- "Tenggara.Metropolitan"
rename$Daerah[rename$Daerah=="Northern Victoria"] <- "Utara.Victoria"
rename$Daerah[rename$Daerah=="Western Victoria"] <- "Barat.Victoria"
rename$Daerah[rename$Daerah=="Eastern Victoria"] <- "Timur.Victoria"
#(Utara.Met = "Northern Metropolitan",
# Selatan.Met = "Southern Metropolitan",
# Timur.Met = "Eastern Metropolitan",
# Barat.Met = "Western Metropolitan",
# Tenggara.Met = "South-Eastern Metropolitan",
# Utara.Vic = "Northern Victoria",
# Timur.Vic = "Eastern Victoria",
# Barat.Vic = "Western Victoria")
renameLakukan pengecekan Missing Value dan pengendalian
# Soal 4
tNA = na.omit(rename)
tNAcekmv = setdiff(rename, tNA)
cekmvsummary(rename)## Tipe Pinggiran Kota Harga Metode
## Length:34857 Length:34857 Min. : 85000 Length:34857
## Class :character Class :character 1st Qu.: 635000 Class :character
## Mode :character Mode :character Median : 870000 Mode :character
## Mean : 1050173
## 3rd Qu.: 1295000
## Max. :11200000
## NA's :7610
## PenjualG Tanggal Jarak Kamartidur2
## Length:34857 Length:34857 Min. : 0.00 Min. : 0.000
## Class :character Class :character 1st Qu.: 6.40 1st Qu.: 2.000
## Mode :character Mode :character Median :10.30 Median : 3.000
## Mean :11.18 Mean : 3.085
## 3rd Qu.:14.00 3rd Qu.: 4.000
## Max. :48.10 Max. :30.000
## NA's :1 NA's :8217
## Kamarmandi Mobil Luas Tahunbangun
## Min. : 0.000 Min. : 0.000 Min. : 0.0 Min. :1196
## 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 224.0 1st Qu.:1940
## Median : 2.000 Median : 2.000 Median : 521.0 Median :1970
## Mean : 1.625 Mean : 1.729 Mean : 593.6 Mean :1965
## 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 670.0 3rd Qu.:2000
## Max. :12.000 Max. :26.000 Max. :433014.0 Max. :2106
## NA's :8226 NA's :8728 NA's :11810 NA's :19306
## Daerah
## Length:34857
## Class :character
## Mode :character
##
##
##
##
NA_Wilayah = rename[is.na(rename$Daerah),]
NA_WilayahBuatlah Visualisasi Data secara lengkap untuk mendapatkan informasi penting yang terkandung dalam Data_Perumahan_Melbourne.xlsx
# Soal 5
library(ggplot2)
ggplot(tNA, aes(Tipe, Harga)) +
geom_boxplot(outlier.colour = "black") +
scale_x_discrete(labels = c('CLuster','Komplek','Residen')) +
scale_y_continuous(breaks=seq(0,10000000,1000000)) +
xlab("Tipe") +
ylab("Harga") +
ggtitle("distribusi harga bentuk rumah")rename$Tanggal <- as.Date(rename$Tanggal, format = "%d/%m/%Y")
renamelibrary(scales)
library(dplyr)
plotdata <- tNA %>%
count(Daerah) %>%
arrange(desc(Daerah)) %>%
mutate(prop = round(n*100/sum(n),1),
lab.ypos = cumsum(prop) - 0.5*prop)
ggplot(plotdata,
aes(x = "",
y = prop,
fill = Daerah)) +
geom_bar(width = 1, stat = "identity", color = "black") +
coord_polar("y", start = 0) +
geom_text(aes(y = lab.ypos, label = prop), color = "black") +
scale_fill_brewer(palette="Blues", direction=-1) +
theme_void() +
labs(title = "banyak penduduk berdasarkan wilayah")library(ggplot2) # untuk visualisasi
ggplot(tNA,
aes(x = Luas,
y = Harga)) +
geom_point(color= "cornflowerblue") +
geom_smooth(method = "lm", color = "red")+
scale_y_continuous(label = scales::dollar,
limits = c(50000, 5000000)) +
scale_x_continuous(breaks = seq(0, 2500, 200),
limits=c(0, 2500)) +
theme_minimal() + # menggunakan tema minimal
labs(x = "Luas Bangunan",
y = "Harga",
title = "Luas Bangunan Vs. Harga",
subtitle = "Harga dan Luas")+
theme(axis.text.x = element_text(angle = 60, hjust = 1))library(ggplot2) # untuk visualisasi
ggplot(tNA,
aes(x = Luas,
y = Harga)) +
geom_point(color= "cornflowerblue") +
geom_smooth(method = "lm",
formula = y ~ poly(x, 2),
color = "red")+
scale_y_continuous(label = scales::dollar,
limits = c(50000, 5000000)) +
scale_x_continuous(breaks = seq(0, 2500, 200),
limits=c(0, 2500)) +
theme_minimal() + # menggunakan tema minimal
labs(x = "Luas Bangunan",
y = "Harga",
title = "Luas Bangunan Vs. Harga",
subtitle = "Harga dan Luas")+
theme(axis.text.x = element_text(angle = 60, hjust = 1))library(dplyr) # untuk manipulasi data
library(ggplot2) # untuk visualisasi
library(scales) # menentukan jeda atau label secara otomatis
# menghitung gaji rata-rata untuk setiap jabatan
plotdata <- tNA %>%
group_by(Daerah) %>%
dplyr::summarize(mean_Harga = mean(Harga))
# plot gaji rata-rata dengan cara yang lebih menarik
mycols <- c("#CD534CFF", "#EFC000FF", "#2FDD92","#FFAB4C", "#116530", "#7267CB","#CD534CFF", "#EBE645")
ggplot(plotdata,
aes(x = factor(Daerah,
labels = c("Selatan",
"Utara",
"Barat",
"Timur",
"Tenggara.Metropolitan",
"Utara.Victoria",
"Barat.Victoria",
"Timur.Victoria")),
y = mean_Harga)) +
geom_bar(stat = "identity",
fill = mycols) +
geom_text(aes(label = dollar(mean_Harga)),
vjust = -0.5) +
scale_y_continuous(breaks = seq(0, 40000000, 500000),
label = dollar) +
theme_minimal() + # menggunakan tema minimal
labs(title = "Gaji rata-rata setiap jabatan",
x = "",
y = "") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5))