Praktikum 9 - Visualisasi Data

Febrian Adhitya Cahya Belardi

Pendahuluan

Package Installation

library(tidyverse) #Include beberapa packages termasuk ggplot
library(dplyr)
library(reshape2)
library(ggforce)
library(readxl)

Data

data <- read.csv("https://raw.githubusercontent.com/gerrydito/Sains-Data-S2/master/Praktikum/Visualisasi%20Data/house_price.csv", header = TRUE, sep=",")
str(data)
## 'data.frame':    4600 obs. of  18 variables:
##  $ date         : chr  "2014-05-02 00:00:00" "2014-05-02 00:00:00" "2014-05-02 00:00:00" "2014-05-02 00:00:00" ...
##  $ price        : num  313000 2384000 342000 420000 550000 ...
##  $ bedrooms     : num  3 5 3 3 4 2 2 4 3 4 ...
##  $ bathrooms    : num  1.5 2.5 2 2.25 2.5 1 2 2.5 2.5 2 ...
##  $ sqft_living  : int  1340 3650 1930 2000 1940 880 1350 2710 2430 1520 ...
##  $ sqft_lot     : int  7912 9050 11947 8030 10500 6380 2560 35868 88426 6200 ...
##  $ floors       : num  1.5 2 1 1 1 1 1 2 1 1.5 ...
##  $ waterfront   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ view         : int  0 4 0 0 0 0 0 0 0 0 ...
##  $ condition    : int  3 5 4 4 4 3 3 3 4 3 ...
##  $ sqft_above   : int  1340 3370 1930 1000 1140 880 1350 2710 1570 1520 ...
##  $ sqft_basement: int  0 280 0 1000 800 0 0 0 860 0 ...
##  $ yr_built     : int  1955 1921 1966 1963 1976 1938 1976 1989 1985 1945 ...
##  $ yr_renovated : int  2005 0 0 0 1992 1994 0 0 0 2010 ...
##  $ street       : chr  "18810 Densmore Ave N" "709 W Blaine St" "26206-26214 143rd Ave SE" "857 170th Pl NE" ...
##  $ city         : chr  "Shoreline" "Seattle" "Kent" "Bellevue" ...
##  $ statezip     : chr  "WA 98133" "WA 98119" "WA 98042" "WA 98008" ...
##  $ country      : chr  "USA" "USA" "USA" "USA" ...

Visualisasi

# Create a function to generate random walk time series data
generate_random_walk <- function(n, start = 0, sd = 1) {
  steps <- rnorm(n, mean = 0, sd = sd)
  walk <- cumsum(steps) + start
  return(walk)
}

# Create the time series data using random walk
set.seed(123)
dates <- seq(as.Date("2020-01-01"), by = "month", length.out = 600)
values <- generate_random_walk(600, start = 100, sd = 10)
time_series_data <- data.frame(date = dates, value = values)

Scatter lot & Timeseries

ggplot(time_series_data, aes(x = date, y = value)) +
  geom_point() +
  labs(title = "Scatter Plot of Time Series Data",
       x = "Date",
       y = "Value")

ggplot(time_series_data, aes(x = date, y = value)) +
  geom_point() +  # Add points
  geom_line() +   # Connect points with lines
  labs(title = "Scatter Plot with Connected Lines",
       x = "Date",
       y = "Value")

ggplot(time_series_data, aes(x = date, y = value)) +
  geom_line() +
  labs(title = "Random Walk Time Series Plot",
       x = "Date",
       y = "Value")

calculate_moving_average <- function(data, window_size) {
  ma_values <- zoo::rollmean(data$value, k = window_size, align = "right", fill = NA)
  return(ma_values)
}
window_size <- 10
time_series_data$ma <- calculate_moving_average(time_series_data, window_size)

# Create the plot
ggplot(time_series_data, aes(x = date)) +
  geom_line(aes(y = value), color = "white", size = 1) +  
  geom_line(aes(y = ma), color = "red", linetype = "dashed", size = 1) +  # emulusan
  geom_ribbon(aes(ymin = -Inf, ymax = ma), fill = "red", alpha = 0.2) +  # Area under curve
  labs(title = paste("Time Series Data with Moving Average (Window Size:", window_size, ")"),
       x = "Date",
       y = "Value") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).

dataset <- read_excel("/Users/user/Downloads/DATA KURS FIX MPDW.xlsx")
str(dataset)
## tibble [206 × 1] (S3: tbl_df/tbl/data.frame)
##  $ GBP: num [1:206] 20177 20118 20183 20208 20240 ...
data <- dataset$GBP

data.ts <- ts(data)

plot(data.ts, xlab ="Waktu", ylab = "Data Kurs (Ribu)", col="red", main = "Plot Data Kurs Jual")
points(data.ts)

data.train <- ts(data[1:145])
data.test <- ts(data[146:206], start = 146)

#Time Series Data
training.ts<-ts(data.train)
testing.ts<-ts(data.test)

ts.plot(data.ts, xlab = "Periode", ylab ="Data Inflasi (Persen)", 
        main = "Plot Data Training dan Data Testing")
lines(data.train, col = "blue")
lines(data.test, col="Red")
legend(-0.9,22100,c("Data Training","Data Testing"), 
       lty=8, col=c("blue","red"), cex=0.8)
abline(v=146, col=c("black"), lty=1, lwd=1)

Waktu Jamak

set.seed(123)
dates <- seq(as.Date("2020-01-01"), by = "month", length.out = 12)
values_location1 <- cumsum(rnorm(12, mean = 5, sd = 2)) + 100
values_location2 <- cumsum(rnorm(12, mean = 3, sd = 1)) + 80

time_series_data <- data.frame(date = rep(dates, 2),
                               value = c(values_location1, values_location2),
                               location = rep(c("Location 1", "Location 2"), each = 12))

last_values <- time_series_data %>%
  group_by(location) %>%
  summarise(last_value = last(value))

ggplot(time_series_data, aes(x = date, y = value, color = location, linetype = location)) +
  geom_line() +
  geom_text(data = last_values, aes(label = location), 
            x = max(time_series_data$date), 
            y = last_values$last_value, 
            hjust = -0.1, vjust = -0.5, size = 3, color = "black") +
  labs(title = "Time Series Data for Two Locations/Scenarios",
       x = "Date",
       y = "Value") +
  scale_color_manual(values = c("Location 1" = "blue", "Location 2" = "red")) +
  scale_linetype_manual(values = c("Location 1" = "solid", "Location 2" = "dashed")) +
  theme_minimal()

Dua Peubah

set.seed(123)
dates <- seq(as.Date("2020-01-01"), by = "month", length.out = 12)
values_location1 <- cumsum(rnorm(12, mean = 5, sd = 2)) + 100
values_location2 <- cumsum(rnorm(12, mean = 3, sd = 1)) + 80

time_series_data_location1 <- data.frame(date = dates, value = values_location1)
time_series_data_location2 <- data.frame(date = dates, value = values_location2)

# Visual
ggplot() +
  geom_line(data = time_series_data_location1, aes(x = date, y = value, color = "Location 1")) +
  geom_line(data = time_series_data_location2, aes(x = date, y = value * 2, color = "Location 2")) +
  scale_color_manual(values = c("blue", "red")) +
  labs(title = "Time Series Data for Two Locations/Scenarios",
       x = "Date",
       y = "Location 1 Value") +
  theme_minimal() +
  theme(axis.title.y = element_text(color = "blue"),
        axis.title.y.right = element_text(color = "red"),
        axis.text.y.right = element_text(color = "red")) + scale_y_continuous(sec.axis = sec_axis(~./2, name = "Location 2 Value")) + labs(color = "Locations")