CANADA HOUSING DATASET

This data comes from Kaggle https://www.kaggle.com/datasets/yuliiabulana/canada-housing which contains Property information with features such as price, location, size, number of bedrooms and bathrooms, and other additional features.

Details of the features used in this data exploration and visualization, including:

Feature	Details
Price	Canadian currency (50,000 - 5,000,000)
Property Type	Single family, Condo, Townhome, Condo/Townhome, Duplex
Square Footage	(0 - 10000)
Province	AB (Alberta), BC (British Columbia), MB (Manitoba), NB (New Brunswick), NL (Newfoundlan and Labrador), NS (Nova Scotia), NT (Northwest Territories), ON (Ontario), PE (Prince Edward Island), SK (Saskatchewan), YT (Yukon).

library(readxl)
library(ggplot2)
library(ggpubr)
library(scales)
library(ggridges)
canada <- read_xlsx("D:/STATISTIKA 23/KULIAH/MK SEMESTER 4/Kumpulan dataset/Canada Housing.xlsx")
head(canada)

## # A tibble: 6 × 23
##   City         Province Latitude Longitude   Price Bedrooms Bathrooms Acreage
##   <chr>        <chr>       <dbl>     <dbl>   <dbl>    <dbl>     <dbl>   <dbl>
## 1 Revelstoke   BC           51.0     -118.  839000        3         2    0   
## 2 Boswell      BC           49.5     -117. 1150000        3         2    0.32
## 3 West Kelowna BC           49.8     -120.  149000        2         1    0   
## 4 Kelowna      BC           49.8     -119. 1298000        5         4    0.69
## 5 Maple Ridge  BC           49.2     -123.  759900        3         2    0   
## 6 Calgary      AB           50.9     -114.  749800        4         3    0.07
## # ℹ 15 more variables: `Property Type` <chr>, `Square Footage` <dbl>,
## #   Garage <chr>, Parking <chr>, Basement <chr>, Exterior <chr>,
## #   Fireplace <chr>, Heating <chr>, Flooring <chr>, Roof <chr>,
## #   Waterfront <chr>, Sewer <chr>, Pool <chr>, Garden <chr>, Balcony <chr>

nrow(canada)

## [1] 44101

dim(canada)

## [1] 44101    23

summary(canada)

##      City             Province            Latitude       Longitude      
##  Length:44101       Length:44101       Min.   :42.05   Min.   :-135.86  
##  Class :character   Class :character   1st Qu.:48.46   1st Qu.:-122.88  
##  Mode  :character   Mode  :character   Median :49.22   Median :-119.23  
##                                        Mean   :49.01   Mean   :-106.39  
##                                        3rd Qu.:49.99   3rd Qu.:-102.38  
##                                        Max.   :65.28   Max.   : -52.67  
##      Price            Bedrooms        Bathrooms         Acreage        
##  Min.   :  50000   Min.   : 0.000   Min.   : 0.000   Min.   :   0.000  
##  1st Qu.: 399900   1st Qu.: 2.000   1st Qu.: 2.000   1st Qu.:   0.000  
##  Median : 675000   Median : 3.000   Median : 2.000   Median :   0.050  
##  Mean   : 937983   Mean   : 3.179   Mean   : 2.467   Mean   :   2.457  
##  3rd Qu.:1175000   3rd Qu.: 4.000   3rd Qu.: 3.000   3rd Qu.:   0.170  
##  Max.   :5000000   Max.   :21.000   Max.   :23.000   Max.   :8600.000  
##  Property Type      Square Footage     Garage            Parking         
##  Length:44101       Min.   :  140   Length:44101       Length:44101      
##  Class :character   1st Qu.:  968   Class :character   Class :character  
##  Mode  :character   Median : 1400   Mode  :character   Mode  :character  
##                     Mean   : 1704                                        
##                     3rd Qu.: 2145                                        
##                     Max.   :10000                                        
##    Basement           Exterior          Fireplace           Heating         
##  Length:44101       Length:44101       Length:44101       Length:44101      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Flooring             Roof            Waterfront           Sewer          
##  Length:44101       Length:44101       Length:44101       Length:44101      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      Pool              Garden            Balcony         
##  Length:44101       Length:44101       Length:44101      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##

Histogram

ggplot(data = canada, mapping = aes(x = Price)) + 
  geom_histogram() +
  labs(
    title = "Distribusi Harga Rumah",
    x = "Harga($CAD)",
    y = "Frekuensi"
  ) +
  scale_x_continuous(
    limits = c(0, 5000000),
    breaks = seq(0, 5000000, by=700000),
    labels = comma)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

ggplot(data = canada, mapping = aes(x = Price)) + 
  geom_histogram(bins = 90, color = 'black') +
  labs(
    title = "Distribusi Harga Rumah",
    x = "Harga($CAD)",
    y = "Frekuensi"
  ) +
  scale_x_continuous(
    limits = c(0, 5000000),
    breaks = seq(0, 5000000, by=700000),
    labels = comma)

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

ggplot(data = canada, mapping = aes(x = Price)) + 
  geom_histogram(bins = 90, color = "black", fill = "red") + 
  scale_x_continuous(
    limits = c(0, 5000000),
    breaks = seq(0, 5000000, by=700000),
    labels = comma) + 
  scale_y_continuous(labels = comma) + 
  labs(
    title = "Distribusi Harga Rumah",
    x = "Harga($CAD)",
    y = "Frekuensi")

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

Density plot

ggplot(data = canada, mapping = aes(x = Price)) + 
  geom_density() +
  labs(
    title = "Distribusi Harga Rumah",
    x = "Harga($CAD)",
    y = "Frekuensi"
  ) +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(
    limits = c(0, 5000000),
    breaks = seq(0, 5000000, by=700000),
    labels = comma)

ggplot(data = canada, mapping = aes(x = Price)) + 
  geom_density(fill = "red", alpha = 0.9) +
  labs(
    title = "Distribusi Harga Rumah",
    x = "Harga($CAD)",
    y = "Frekuensi"
  ) +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(
    limits = c(0, 5000000),
    breaks = seq(0, 5000000, by=700000),
    labels = comma)

ggplot(data = canada, mapping = aes(x = Price, fill = `Property Type`, color =  `Property Type`)) + 
  geom_density(alpha = 0.9) +
  labs(
    title = "Distribusi Harga Rumah",
    x = "Harga($CAD)",
    y = "Frekuensi"
  ) +
  scale_x_continuous(
    limits = c(0,5000000),
    breaks = seq(0, 5000000, by=700000),
    labels = comma) + 
  scale_y_continuous(labels = comma)

ggplot(data = canada, mapping = aes(x = `Square Footage`, y= `Property Type` ,fill = `Property Type`)) + 
  geom_density_ridges() + 
  scale_x_continuous(
    limits = c(0,10000),
    breaks = seq(0, 10000, by=1000)) +
  labs(title = "Distribusi Luas Bangunan", x = "Square Footage", y = "Property Type")

## Picking joint bandwidth of 143

Box Plot

ggplot(data = canada, mapping = aes(x = Price)) + 
  geom_boxplot() +
  scale_x_continuous(
    limits = c(0,5000000),
    breaks = seq(0, 5000000, by=700000),
    labels = comma) +
  labs(title = "Distribusi Harga Rumah")

ggplot(data = canada, mapping = aes(x = Price)) + 
  geom_boxplot() +
  scale_x_continuous(
    limits = c(0,5000000),
    breaks = seq(0, 5000000, by=700000),
    labels = comma) +
  coord_flip() +
  labs(title = "Distribusi Harga Rumah")

ggplot(data = canada, mapping = aes(x = Price, y = Province)) + 
  geom_boxplot() +
  scale_x_continuous(
    limits = c(0,5000000),
    breaks = seq(0, 5000000, by=700000),
    labels = comma) +
  labs(title = "Distribusi Harga Rumah")

ggplot(data = canada, mapping = aes(x = `Square Footage`, y = Province, fill = Province)) + 
  geom_boxplot() +
  scale_x_continuous(
    limits = c(0, 10000),
    breaks = seq(0, 10000, by=1000)) +
  labs(title = "Distribusi Harga Rumah")

ggplot(data = canada, mapping = aes(x = `Square Footage`, y = Province, fill = Province)) + 
  geom_boxplot() + 
  scale_x_continuous(
    limits = c(0, 10000),
    breaks = seq(0, 16000, by=2000)) +
  theme(legend.position = "none") +
  labs(title = "Distribusi Harga Rumah")

##Violin Plot

ggplot(data = canada, mapping = aes(x = Price,y="all")) +
  scale_x_continuous(
    limits = c(0,5000000),
    breaks = seq(0, 5000000, by=700000),
    labels = comma) +
  geom_violin(fill="green",alpha=0.9) +
  geom_boxplot(fill="blue",width=0.2) +
  labs(title = "Distribusi Harga Rumah")

QQ-Plot

ggplot(data = canada, aes(sample = Price)) +
  scale_y_continuous(
    breaks = seq(0, 5000000, by=700000),
    labels = comma) +
  stat_qq(col="blue", cex=0.9) +
  stat_qq_line(col="red", lwd=1) +
  labs(title = "Distribusi Harga Rumah")

Scatter Plot

ggplot(data = canada, mapping = aes(x = `Square Footage`, y = Price)) + 
  geom_point() +
  scale_y_continuous(
    breaks = seq(0, 5000000, by=700000),
    labels = comma) +
  scale_x_continuous(breaks = seq(0, 10000, by=1000)) +
  labs(title = "Distribusi Harga Rumah")

ggplot(data = canada, mapping = aes(x = `Square Footage`, y = Price, color = `Property Type`)) + 
  geom_point() +
  scale_y_continuous(
    breaks = seq(0, 5000000, by=700000),
    labels = comma) +
  scale_x_continuous(breaks = seq(0, 10000, by=1000)) +
  labs(title = "Distribusi Harga Rumah")

ORACLE STOCK DATASET

##Line Chart

oracle <- read_excel("D:/STATISTIKA 23/KULIAH/MK SEMESTER 4/Kumpulan dataset/Oracle Stock Dataset.xlsx")
oracle$date <- as.Date(oracle$Date)
head(oracle)

## # A tibble: 6 × 8
##   Date                `Adj Close` Close  High   Low  Open    Volume date      
##   <dttm>                    <dbl> <dbl> <dbl> <dbl> <dbl>     <dbl> <date>    
## 1 1996-01-02 00:00:00        2.59  3.20  3.21  3.14  3.17  45603000 1996-01-02
## 2 1996-01-03 00:00:00        2.47  3.06  3.21  3.06  3.15  55518750 1996-01-03
## 3 1996-01-04 00:00:00        2.44  3.02  3.08  2.94  3.08 129803850 1996-01-04
## 4 1996-01-05 00:00:00        2.57  3.18  3.19  2.94  2.97  83783700 1996-01-05
## 5 1996-01-08 00:00:00        2.53  3.13  3.23  3.12  3.22  12594150 1996-01-08
## 6 1996-01-09 00:00:00        2.41  2.98  3.14  2.93  3.14  96068700 1996-01-09

str(oracle)

## tibble [7,260 × 8] (S3: tbl_df/tbl/data.frame)
##  $ Date     : POSIXct[1:7260], format: "1996-01-02" "1996-01-03" ...
##  $ Adj Close: num [1:7260] 2.59 2.47 2.44 2.57 2.53 ...
##  $ Close    : num [1:7260] 3.2 3.06 3.02 3.18 3.13 ...
##  $ High     : num [1:7260] 3.21 3.21 3.08 3.19 3.23 ...
##  $ Low      : num [1:7260] 3.14 3.06 2.94 2.94 3.12 ...
##  $ Open     : num [1:7260] 3.17 3.15 3.08 2.97 3.22 ...
##  $ Volume   : num [1:7260] 4.56e+07 5.55e+07 1.30e+08 8.38e+07 1.26e+07 ...
##  $ date     : Date[1:7260], format: "1996-01-02" "1996-01-03" ...

Line Chart Dasar

ggplot(data = oracle, aes(x = Date, y = Open)) +
  geom_line() +
  labs(
    title = "Distribusi",
    x = "Tahun",
    y = "Harga Pada Saat Pembukaan Pasar"
  )

Area Plot

ggplot(data = oracle, aes(x = Date,y = Open)) +
  geom_line(lwd=1.2, col="darkblue") +
  geom_area(fill="blue", alpha=0.3) + 
  labs(
    title = "Distribusi",
    x = "Tahun",
    y = "Harga Pada Saat Pembukaan Pasar"
  )

Multiple Line Chart

ggplot(data = oracle, aes(x=Date)) +
  geom_line(aes(y=Open), lwd=1.5, col="red") +
  geom_line(aes(y=Close), lwd=1.5, col="lightblue") +
  labs(title="Distribusi",x="tanggal", y="Harga Pada Saat Pembukaan Pasar")

ggplot(data = oracle, aes(x=Date)) +
  geom_line(aes(y=Open), lwd=1.2, col="red") +
  geom_line(aes(y=Close), lwd=1.2, col="lightblue") +
  xlim(min(oracle$Date),max(oracle$Date)+100) +
  labs(title="Distribusi",x="tanggal", y="Harga Pada Saat Pembukaan Pasar")

EKSPLORASI DAN VISUALISASI DATA (Visualisasi Data Numerik)

REGINA DWIRAHMA ALISYA

2025-03-14