Algoritma & Struktur Data

~ Ujian Akhir Semester ~


Kontak : \(\downarrow\)
Email
RPubs https://rpubs.com/kentzend03/


Soal

Berikut ini lampiran data perumahan di kota Melbourne Australia, dengan struktur data sebagai berikut:

Type: yang dibagi menjadi 3 bagian TipeA.csv TipeB.xlsx TipeC.rds

Dimana setiap tipe rumah (A,B,C) tersebut terdiri dari beberapa variabel sebagai berikut:

  • Suburb: Suburb

  • Rooms: Number of rooms

  • Price: Price in dollars

  • Method: Status of property sale -

    • S - property sold;

    • SP - property sold prior;

    • PI - property passed in;

    • PN - sold prior not disclosed;

    • SN - sold not disclosed;

    • VB - vendor bid;

    • W - withdrawn prior to auction;

    • SA - sold after auction;

    • SS - sold after auction price not disclosed.

  • SellerG: Real Estate Agent

  • Date: Date sold

  • Distance: Distance from CBD

  • Bedroom2: Number of Bedrooms

  • Bathroom: Number of Bathrooms

  • Car: Number of car spots

  • Landsize: Land Size

  • YearBuilt: Year the house was built

  • Regionname: General Region (West, North West, North, North east …etc)

1. Gabungkan Data Tipe A,B, C ke dalam Satu Data Frame Sehingga Membentuk Struktur Data (Type,Subur, Price, Method, SellerG, Date, Distance,Bedroom2, Bathroom, Car, Landsize, YearBuilt, Regionname)

pacman::p_load(readxl, writexl)   # Untuk dapat membaca excel
data1 <- read.csv("TipeA.csv",sep = ",")    # Import data dengan tipe csv
data2 <- read_excel("TipeB.xlsx")     # Import data dengan tipe csv
data3 <- readRDS("TipeC.rds")     # Import data dengan tipe csv

# Menambahkan variabel baru di setiap data.
data1$Type = "Cluster"
data2$Type = "Kompleks"
data3$Type = "Residence"

df <- rbind(data1,data2,data3)    # Menggabungkan ketiga data di atas
df   # Memanggil data
# Membuat satu data frame baru dengan nama ok yang berisi gabungan ketiga data
ok = data.frame("Type" =df$Type,
               "Subur" =df$Suburb,
               "Price"= df$Price,
               "Method" = df$Method,
               "SellerG" = df$SellerG,
               "Date" = df$Date,
               "Distance" = df$Distance,
               "Bedroom2" = df$Bedroom2,
               "Bathroom" = df$Bathroom,
               "Car" = df$Car,
               "Landsize" = df$Landsize,
               "YearBuilt" = df$YearBuilt,
               "Regionname" = df$Regionname)

ok  # Memanggil data frame

2. Rename Setiap Variabel di atas ke dalam Bahasa Indonesia

change <- ok   # Mengubah nama data frame ok menjadi change

# Rename semua nama varibel dengan menggunakan fungsi names
names(change) = c("Tipe",
               "Pinggiran Kota",
               "Harga",
               "Metode",
               "PenjualG",
               "Tanggal" ,
               "Jarak",
               "Kamartidur2",
               "Kamarmandi",
               "Mobil",
               "Luas",
               "Tahunbangun",
               "Daerah")

change

3. Rename Kategori Variabel “Regionname” ke dalam Bahasa Indonesia.

# Rename variabel "Regionname" ke dalam bahasa Indonesia
change$Daerah[change$Daerah=="Southern Metropolitan"] <- "Metropolitan Selatan"
change$Daerah[change$Daerah=="Northern Metropolitan"] <- "Metropolitan Utara"
change$Daerah[change$Daerah=="Western Metropolitan"] <- "Metropolitan Barat"
change$Daerah[change$Daerah=="Eastern Metropolitan"] <- "Metropolitan Timur"
change$Daerah[change$Daerah=="South-Eastern Metropolitan"] <- "Metropolitan Tenggara"
change$Daerah[change$Daerah=="Northern Victoria"] <- "Victoria Utara"
change$Daerah[change$Daerah=="Western Victoria"] <- "Victoria Barat"
change$Daerah[change$Daerah=="Eastern Victoria"] <- "Victoria Timur."

change  # Memanggil data

4. Lakukan Pengecekan Missing Value dan Pengendalian

# Cek semua missing value
t_NA = na.omit(change)  # Menghapus missing value pada data
t_NA  # Memanggil data
ada.NA = setdiff(change, t_NA)  # Memunculkan data "change" yang hanya memiliki missing value
ada.NA  # Memanggil Data
summary(change)  # Meringkas semua data "change"
##      Tipe           Pinggiran Kota         Harga             Metode         
##  Length:34857       Length:34857       Min.   :   85000   Length:34857      
##  Class :character   Class :character   1st Qu.:  635000   Class :character  
##  Mode  :character   Mode  :character   Median :  870000   Mode  :character  
##                                        Mean   : 1050173                     
##                                        3rd Qu.: 1295000                     
##                                        Max.   :11200000                     
##                                        NA's   :7610                         
##    PenjualG           Tanggal              Jarak        Kamartidur2    
##  Length:34857       Length:34857       Min.   : 0.00   Min.   : 0.000  
##  Class :character   Class :character   1st Qu.: 6.40   1st Qu.: 2.000  
##  Mode  :character   Mode  :character   Median :10.30   Median : 3.000  
##                                        Mean   :11.18   Mean   : 3.085  
##                                        3rd Qu.:14.00   3rd Qu.: 4.000  
##                                        Max.   :48.10   Max.   :30.000  
##                                        NA's   :1       NA's   :8217    
##    Kamarmandi         Mobil             Luas           Tahunbangun   
##  Min.   : 0.000   Min.   : 0.000   Min.   :     0.0   Min.   :1196   
##  1st Qu.: 1.000   1st Qu.: 1.000   1st Qu.:   224.0   1st Qu.:1940   
##  Median : 2.000   Median : 2.000   Median :   521.0   Median :1970   
##  Mean   : 1.625   Mean   : 1.729   Mean   :   593.6   Mean   :1965   
##  3rd Qu.: 2.000   3rd Qu.: 2.000   3rd Qu.:   670.0   3rd Qu.:2000   
##  Max.   :12.000   Max.   :26.000   Max.   :433014.0   Max.   :2106   
##  NA's   :8226     NA's   :8728     NA's   :11810      NA's   :19306  
##     Daerah         
##  Length:34857      
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 

5.Buatlah Visualisasi Data secara Lengkap untuk Mendapatkan Informasi Penting yang Terkandung dalam Data_Perumahan_Melbourne.xlsx.

library(ggplot2) # Memanggil package ggplot2 untuk keperluan visualisasi data
ggplot(t_NA, aes(Harga)) +
  geom_histogram(binwidth=500000,
                 fill="lightgrey",
                 color="red",
                 bins =10,
                 size=1)+
  
  scale_x_continuous(breaks = c(1000000,2000000,3000000,4000000),
                     labels = c("$1m","$2m","$3m","$4m"))+
  labs(title="Price of Melbourne House")

library(ggplot2)  #Memanggil package ggplot2 untuk keperluan visualisasi data
ggplot(t_NA, aes(Tipe, Harga)) +
  geom_boxplot(outlier.colour = "yellow") + 
  scale_x_discrete(labels = c('A','B','C')) +
  scale_y_continuous(breaks=seq(0,10000000,1500000)) +
  xlab("Tipe") +
  ylab("Harga") +
  ggtitle("Price Distribution of Home Type")

library(scales)
library(dplyr)
plotdata <- t_NA %>%
  count(Daerah) %>%
  arrange(desc(Daerah)) %>%
  mutate(prop = round(n*100/sum(n),1),
         lab.ypos = cumsum(prop) - 0.5*prop)


ggplot(plotdata,
       aes(x = "",
           y = prop,
           fill = Daerah)) +
  geom_bar(width = 1, stat = "identity", color = "black") +
  coord_polar("y", start = 0) +
  geom_text(aes(y = lab.ypos, label = prop), color = "black") +
  scale_fill_brewer(palette="Greens", direction=-1) +
  theme_void() +
  labs(title = "Total Population")

library(ggplot2)                                     # untuk visualisasi
ggplot(t_NA,
       aes(x = Luas, 
           y = Harga)) +
  geom_point(color= "violet") +
  geom_smooth(method = "lm", color = "black")+
  scale_y_continuous(label = scales::dollar, 
                     limits = c(50000, 5000000)) +
  scale_x_continuous(breaks = seq(0, 2500, 200), 
                     limits=c(0, 2500)) +
  theme_minimal() +                                  # menggunakan tema minimal
  labs(x = "Luas Bangunan",
       y = "Harga",
       title = "Luas Bangunan Vs. Harga",
       subtitle = "Harga dan Luas")+
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

library(ggplot2)                                     # untuk visualisasi
ggplot(t_NA,
       aes(x = Luas, 
           y = Harga)) +
  geom_point(color= "red") +
  geom_smooth(method = "lm",
              formula = y ~ poly(x, 2),
              color = "yellow")+
  scale_y_continuous(label = scales::dollar, 
                     limits = c(50000, 5000000)) +
  scale_x_continuous(breaks = seq(0, 2500, 200), 
                     limits=c(0, 2500)) +
  theme_minimal() +                                  # menggunakan tema minimal
  labs(x = "Luas Bangunan",
       y = "Harga",
       title = "Luas Bangunan Vs. Harga",
       subtitle = "Harga dan Luas")+
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

library(dplyr)                                       # untuk manipulasi data
library(ggplot2)                                     # untuk visualisasi
library(scales)                                      # menentukan jeda atau label secara otomatis

# menghitung gaji rata-rata untuk setiap jabatan
plotdata <- t_NA %>%
  group_by(Daerah) %>%
  dplyr::summarize(mean_Harga = mean(Harga))
# plot gaji rata-rata dengan cara yang lebih menarik
mycols <- c("red", "orange", "yellow","green", "darkblue", "lightblue","lightgrey", "violet")
ggplot(plotdata, 
       aes(x = factor(Daerah,
                      labels = c("Selatan",
                                 "Utara",
                                 "Barat",
                                 "Timur",
                                 "Tenggara.Metropolitan",
                                 "Utara.Victoria",
                                 "Barat.Victoria",
                                 "Timur.Victoria")), 
                      y = mean_Harga)) +
  geom_bar(stat = "identity", 
           fill = mycols) +
  geom_text(aes(label = dollar(mean_Harga)), 
            vjust = -0.5) +
  scale_y_continuous(breaks = seq(0, 40000000, 500000), 
                     label = dollar) +
  theme_minimal() +                                  # menggunakan tema minimal
  labs(title = "Mean Salary by Rank", 
       x = "",
       y = "") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5))

