Algoritma & Struktur Data
~ Ujian Akhir Semester ~
| Kontak | : \(\downarrow\) |
| kent.zendrato@student.matanauniversity.ac.id | |
| RPubs | https://rpubs.com/kentzend03/ |
Soal
Berikut ini lampiran data perumahan di kota Melbourne Australia, dengan struktur data sebagai berikut:
Type: yang dibagi menjadi 3 bagian TipeA.csv TipeB.xlsx TipeC.rds
Dimana setiap tipe rumah (A,B,C) tersebut terdiri dari beberapa variabel sebagai berikut:
Suburb: Suburb
Rooms: Number of rooms
Price: Price in dollars
Method: Status of property sale -
S - property sold;
SP - property sold prior;
PI - property passed in;
PN - sold prior not disclosed;
SN - sold not disclosed;
VB - vendor bid;
W - withdrawn prior to auction;
SA - sold after auction;
SS - sold after auction price not disclosed.
SellerG: Real Estate Agent
Date: Date sold
Distance: Distance from CBD
Bedroom2: Number of Bedrooms
Bathroom: Number of Bathrooms
Car: Number of car spots
Landsize: Land Size
YearBuilt: Year the house was built
Regionname: General Region (West, North West, North, North east …etc)
1. Gabungkan Data Tipe A,B, C ke dalam Satu Data Frame Sehingga Membentuk Struktur Data (Type,Subur, Price, Method, SellerG, Date, Distance,Bedroom2, Bathroom, Car, Landsize, YearBuilt, Regionname)
pacman::p_load(readxl, writexl) # Untuk dapat membaca excel
data1 <- read.csv("TipeA.csv",sep = ",") # Import data dengan tipe csv
data2 <- read_excel("TipeB.xlsx") # Import data dengan tipe csv
data3 <- readRDS("TipeC.rds") # Import data dengan tipe csv
# Menambahkan variabel baru di setiap data.
data1$Type = "Cluster"
data2$Type = "Kompleks"
data3$Type = "Residence"
df <- rbind(data1,data2,data3) # Menggabungkan ketiga data di atas
df # Memanggil data# Membuat satu data frame baru dengan nama ok yang berisi gabungan ketiga data
ok = data.frame("Type" =df$Type,
"Subur" =df$Suburb,
"Price"= df$Price,
"Method" = df$Method,
"SellerG" = df$SellerG,
"Date" = df$Date,
"Distance" = df$Distance,
"Bedroom2" = df$Bedroom2,
"Bathroom" = df$Bathroom,
"Car" = df$Car,
"Landsize" = df$Landsize,
"YearBuilt" = df$YearBuilt,
"Regionname" = df$Regionname)
ok # Memanggil data frame2. Rename Setiap Variabel di atas ke dalam Bahasa Indonesia
change <- ok # Mengubah nama data frame ok menjadi change
# Rename semua nama varibel dengan menggunakan fungsi names
names(change) = c("Tipe",
"Pinggiran Kota",
"Harga",
"Metode",
"PenjualG",
"Tanggal" ,
"Jarak",
"Kamartidur2",
"Kamarmandi",
"Mobil",
"Luas",
"Tahunbangun",
"Daerah")
change3. Rename Kategori Variabel “Regionname” ke dalam Bahasa Indonesia.
# Rename variabel "Regionname" ke dalam bahasa Indonesia
change$Daerah[change$Daerah=="Southern Metropolitan"] <- "Metropolitan Selatan"
change$Daerah[change$Daerah=="Northern Metropolitan"] <- "Metropolitan Utara"
change$Daerah[change$Daerah=="Western Metropolitan"] <- "Metropolitan Barat"
change$Daerah[change$Daerah=="Eastern Metropolitan"] <- "Metropolitan Timur"
change$Daerah[change$Daerah=="South-Eastern Metropolitan"] <- "Metropolitan Tenggara"
change$Daerah[change$Daerah=="Northern Victoria"] <- "Victoria Utara"
change$Daerah[change$Daerah=="Western Victoria"] <- "Victoria Barat"
change$Daerah[change$Daerah=="Eastern Victoria"] <- "Victoria Timur."
change # Memanggil data4. Lakukan Pengecekan Missing Value dan Pengendalian
# Cek semua missing value
t_NA = na.omit(change) # Menghapus missing value pada data
t_NA # Memanggil dataada.NA = setdiff(change, t_NA) # Memunculkan data "change" yang hanya memiliki missing value
ada.NA # Memanggil Datasummary(change) # Meringkas semua data "change"## Tipe Pinggiran Kota Harga Metode
## Length:34857 Length:34857 Min. : 85000 Length:34857
## Class :character Class :character 1st Qu.: 635000 Class :character
## Mode :character Mode :character Median : 870000 Mode :character
## Mean : 1050173
## 3rd Qu.: 1295000
## Max. :11200000
## NA's :7610
## PenjualG Tanggal Jarak Kamartidur2
## Length:34857 Length:34857 Min. : 0.00 Min. : 0.000
## Class :character Class :character 1st Qu.: 6.40 1st Qu.: 2.000
## Mode :character Mode :character Median :10.30 Median : 3.000
## Mean :11.18 Mean : 3.085
## 3rd Qu.:14.00 3rd Qu.: 4.000
## Max. :48.10 Max. :30.000
## NA's :1 NA's :8217
## Kamarmandi Mobil Luas Tahunbangun
## Min. : 0.000 Min. : 0.000 Min. : 0.0 Min. :1196
## 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 224.0 1st Qu.:1940
## Median : 2.000 Median : 2.000 Median : 521.0 Median :1970
## Mean : 1.625 Mean : 1.729 Mean : 593.6 Mean :1965
## 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 670.0 3rd Qu.:2000
## Max. :12.000 Max. :26.000 Max. :433014.0 Max. :2106
## NA's :8226 NA's :8728 NA's :11810 NA's :19306
## Daerah
## Length:34857
## Class :character
## Mode :character
##
##
##
##
5.Buatlah Visualisasi Data secara Lengkap untuk Mendapatkan Informasi Penting yang Terkandung dalam Data_Perumahan_Melbourne.xlsx.
library(ggplot2) # Memanggil package ggplot2 untuk keperluan visualisasi data
ggplot(t_NA, aes(Harga)) +
geom_histogram(binwidth=500000,
fill="lightgrey",
color="red",
bins =10,
size=1)+
scale_x_continuous(breaks = c(1000000,2000000,3000000,4000000),
labels = c("$1m","$2m","$3m","$4m"))+
labs(title="Price of Melbourne House")library(ggplot2) #Memanggil package ggplot2 untuk keperluan visualisasi data
ggplot(t_NA, aes(Tipe, Harga)) +
geom_boxplot(outlier.colour = "yellow") +
scale_x_discrete(labels = c('A','B','C')) +
scale_y_continuous(breaks=seq(0,10000000,1500000)) +
xlab("Tipe") +
ylab("Harga") +
ggtitle("Price Distribution of Home Type")library(scales)
library(dplyr)
plotdata <- t_NA %>%
count(Daerah) %>%
arrange(desc(Daerah)) %>%
mutate(prop = round(n*100/sum(n),1),
lab.ypos = cumsum(prop) - 0.5*prop)
ggplot(plotdata,
aes(x = "",
y = prop,
fill = Daerah)) +
geom_bar(width = 1, stat = "identity", color = "black") +
coord_polar("y", start = 0) +
geom_text(aes(y = lab.ypos, label = prop), color = "black") +
scale_fill_brewer(palette="Greens", direction=-1) +
theme_void() +
labs(title = "Total Population")library(ggplot2) # untuk visualisasi
ggplot(t_NA,
aes(x = Luas,
y = Harga)) +
geom_point(color= "violet") +
geom_smooth(method = "lm", color = "black")+
scale_y_continuous(label = scales::dollar,
limits = c(50000, 5000000)) +
scale_x_continuous(breaks = seq(0, 2500, 200),
limits=c(0, 2500)) +
theme_minimal() + # menggunakan tema minimal
labs(x = "Luas Bangunan",
y = "Harga",
title = "Luas Bangunan Vs. Harga",
subtitle = "Harga dan Luas")+
theme(axis.text.x = element_text(angle = 60, hjust = 1))library(ggplot2) # untuk visualisasi
ggplot(t_NA,
aes(x = Luas,
y = Harga)) +
geom_point(color= "red") +
geom_smooth(method = "lm",
formula = y ~ poly(x, 2),
color = "yellow")+
scale_y_continuous(label = scales::dollar,
limits = c(50000, 5000000)) +
scale_x_continuous(breaks = seq(0, 2500, 200),
limits=c(0, 2500)) +
theme_minimal() + # menggunakan tema minimal
labs(x = "Luas Bangunan",
y = "Harga",
title = "Luas Bangunan Vs. Harga",
subtitle = "Harga dan Luas")+
theme(axis.text.x = element_text(angle = 60, hjust = 1))library(dplyr) # untuk manipulasi data
library(ggplot2) # untuk visualisasi
library(scales) # menentukan jeda atau label secara otomatis
# menghitung gaji rata-rata untuk setiap jabatan
plotdata <- t_NA %>%
group_by(Daerah) %>%
dplyr::summarize(mean_Harga = mean(Harga))
# plot gaji rata-rata dengan cara yang lebih menarik
mycols <- c("red", "orange", "yellow","green", "darkblue", "lightblue","lightgrey", "violet")
ggplot(plotdata,
aes(x = factor(Daerah,
labels = c("Selatan",
"Utara",
"Barat",
"Timur",
"Tenggara.Metropolitan",
"Utara.Victoria",
"Barat.Victoria",
"Timur.Victoria")),
y = mean_Harga)) +
geom_bar(stat = "identity",
fill = mycols) +
geom_text(aes(label = dollar(mean_Harga)),
vjust = -0.5) +
scale_y_continuous(breaks = seq(0, 40000000, 500000),
label = dollar) +
theme_minimal() + # menggunakan tema minimal
labs(title = "Mean Salary by Rank",
x = "",
y = "") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5))