library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2
library(tidyverse) #Include beberapa packages termasuk ggplot
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.2
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
data <- read_xlsx("D:/Semester 4/Visualisasi Data/Tugas_Visdat_UAS.xlsx")
str(data)
## tibble [136 × 10] (S3: tbl_df/tbl/data.frame)
##  $ region                      : chr [1:136] "Finland" "Denmark" "Iceland" "Israel" ...
##  $ Ladder_score                : num [1:136] 7804 7586 7530 7473 7403 ...
##  $ upperwhisker                : num [1:136] 7875 7667 7625 7535 7460 ...
##  $ lowerwhisker                : num [1:136] 7733 7506 7434 7411 7346 ...
##  $ Logged_GDP_percapita        : num [1:136] 10792 10962 10896 10639 10942 ...
##  $ Social_support              : num [1:136] 0.969 0.954 0.983 0.943 0.93 0.939 0.943 0.92 0.879 0.952 ...
##  $ Healthy_life_expectancy     : num [1:136] 71150 71250 72050 72697 71550 ...
##  $ Freedom_to_make_life-choices: num [1:136] 0.961 0.934 0.936 0.809 0.887 0.948 0.947 0.891 0.915 0.887 ...
##  $ Generosity                  : num [1:136] -0.019 0.134 0.211 -0.023 0.213 0.165 0.141 0.027 0.024 0.175 ...
##  $ Perceptions_of_corruption   : num [1:136] 0.182 0.196 0.668 0.708 0.379 0.202 0.283 0.266 0.345 0.271 ...

Visualisasi Numerik

Korelasi

ggplot(data, aes(x = Healthy_life_expectancy, y =Ladder_score)) +
  geom_point() +  
  labs(title = "Scatter Plot", x = "Healthy life expectancy", y = "Ladder score") +
  theme_classic()

ggplot(data, aes(x = Perceptions_of_corruption , y = Generosity)) +
  geom_point() +  
  labs(title = "Scatter Plot", x = "Perceptions_of_corruption", y = "Generosity") +
  theme_classic()

## Interpretasi

Scatter plot antara Ladder score dengan Healthy life expectancy memiliki hubungan yang linear positif artinya ketika Healthy life expectancy bertambah maka ladder score happiness juga akan cenderung naik. Sementara itu pada scatter plot kedua yaitu antara Perceptions of corruption dengan Generosity, plot plot amatan tidak terlihat linear sehingga kemungkinan hubungan antara Perceptions of corruption dengan generosity sangat lemah.

Matrix Plot

data_numerik <- select_if(data, is.numeric)
str(data_numerik)
## tibble [136 × 9] (S3: tbl_df/tbl/data.frame)
##  $ Ladder_score                : num [1:136] 7804 7586 7530 7473 7403 ...
##  $ upperwhisker                : num [1:136] 7875 7667 7625 7535 7460 ...
##  $ lowerwhisker                : num [1:136] 7733 7506 7434 7411 7346 ...
##  $ Logged_GDP_percapita        : num [1:136] 10792 10962 10896 10639 10942 ...
##  $ Social_support              : num [1:136] 0.969 0.954 0.983 0.943 0.93 0.939 0.943 0.92 0.879 0.952 ...
##  $ Healthy_life_expectancy     : num [1:136] 71150 71250 72050 72697 71550 ...
##  $ Freedom_to_make_life-choices: num [1:136] 0.961 0.934 0.936 0.809 0.887 0.948 0.947 0.891 0.915 0.887 ...
##  $ Generosity                  : num [1:136] -0.019 0.134 0.211 -0.023 0.213 0.165 0.141 0.027 0.024 0.175 ...
##  $ Perceptions_of_corruption   : num [1:136] 0.182 0.196 0.668 0.708 0.379 0.202 0.283 0.266 0.345 0.271 ...
data_melt <- cor(data_numerik[sapply(data_numerik,is.numeric)])

data_melt <- melt(data_melt) 
ggplot(data_melt, aes(Var1, Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "purple", mid = "white", high = "red", midpoint = 0, limits = c(-1,1), name="Korelasi") +
  labs(title = "Corellogram") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

## Interpretasi

Matriks korelasi diatas menunjukkan bahwa korelasi tertinggi dan positif ada pada ladder score dengan upperwhisker, ladder score dengan lowerwhisker, dan lowerwhisker dengan upperwhisker. Korelasi antara generosity dengan peubah lain cenderung sangat lemah atau tidak ada korelasi(mendekati nol) dan korelasi perceptions of corupstion dengan peubah lain cenderung memiliki nilai korelasi yang negatif namun tidak terlalu kuat. Selain itu, korelasi peubah lainn diluar generosity dengan perceptions of corupstion memiliki nilai yang positif dengan variasi ada yang korelasinya cukup kuat dan ada juga yang lemah.

Visualisasi Time Series

library(tidyverse) #Include beberapa packages termasuk ggplot
library(dplyr)
library(reshape2)
library(ggforce)
library(readxl)
data2 <- read_xlsx("D:/Semester 4/Visualisasi Data/Tugas_UAS_Visdat2.xlsx")
str(data2)
## tibble [259 × 7] (S3: tbl_df/tbl/data.frame)
##  $ Date     : POSIXct[1:259], format: "2023-04-20" "2023-04-21" ...
##  $ Open     : num [1:259] 2.73e+08 2.77e+08 2.79e+08 2.80e+08 2.78e+08 ...
##  $ High     : num [1:259] 2.77e+08 2.79e+08 2.81e+08 2.80e+08 2.79e+08 ...
##  $ Low      : num [1:259] 2.73e+08 2.75e+08 2.79e+08 2.77e+08 2.75e+08 ...
##  $ Close    : num [1:259] 2.76e+08 2.78e+08 2.80e+08 2.77e+08 2.75e+08 ...
##  $ Adj Close: num [1:259] 2.72e+08 2.74e+08 2.78e+08 2.75e+08 2.74e+08 ...
##  $ Volume   : num [1:259] 428800 324300 316000 249800 325100 ...

Scatter lot & Timeseries

data2$Date <- as.Date(data2$Date)

ggplot(data2, aes(x = Date, y = High)) +
  geom_point() +
  labs(title = "Scatter Plot of Time Series Data",
       x = "Date",
       y = "High Price") +
  scale_x_date(date_breaks = "3 months")

ggplot(data2, aes(x = Date, y = High)) +
  geom_point() +  # Add points
  geom_line() +   # Connect points with lines
  labs(title = "Scatter Plot Harga tertinggi ferari dan tesla",
       x = "Date",
       y = "High Price") +
  scale_x_date(date_breaks = "3 months")

ggplot(data2, aes(x = Date, y = High)) +
  geom_line() +
  labs(title = "Time Series Plot Harga Tertinggi Ferrari dan Tesla",
       x = "Date",
       y = "High Price") +
  scale_x_date(date_breaks = "3 months")

calculate_moving_average <- function(data, window_size) {
  ma_values <- zoo::rollmean(data2$High, k = window_size, align = "right", fill = NA)
  return(ma_values)
}
window_size <- 10
data2$ma <- calculate_moving_average(data2, window_size)

# Create the plot
ggplot(data2, aes(x = Date)) +
  geom_line(aes(y = High), color = "white", size = 1) +  
  geom_line(aes(y = ma), color = "purple", linetype = "dashed", size = 1) +  # emulusan
  geom_ribbon(aes(ymin = -Inf, ymax = ma), fill = "purple", alpha = 0.2) +  # Area under curve
  labs(title = paste("Time Series Data with Moving Average (Window Size:", window_size, ")"),
       x = "Date",
       y = "High Price") +
  theme_minimal() +
  scale_x_date(date_breaks = "3 months")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 9 rows containing missing values (`geom_line()`).

## Interpretasi

Berdasarkan grafik time series diatas, dapat diketahui bahwa terdapat fluktuasi harga tertinggi mobil tesla dan ferari dari rentang waktu bulan agustus 2023 sampai bulan april 2024, tetapi harga tertinggi tersebut tetap cenderung naik per tiga bukan sekali dalam rentang tahun 2023-2024.

Visualisasi Geospasial

library(ggplot2)            
library(tidyverse) 
data3 <- data[, c("region", "Ladder_score")]
data3
## # A tibble: 136 × 2
##    region      Ladder_score
##    <chr>              <dbl>
##  1 Finland             7804
##  2 Denmark             7586
##  3 Iceland             7530
##  4 Israel              7473
##  5 Netherlands         7403
##  6 Sweden              7395
##  7 Norway              7315
##  8 Switzerland         7240
##  9 Luxembourg          7228
## 10 New Zealand         7123
## # ℹ 126 more rows
dataworld <- map_data("world") 
data4 <- left_join(dataworld, data3, by="region")
data5 <- data4 %>% filter(!is.na(data4$Ladder_score))
ggplot(data4, aes( x = long, y = lat, group=group)) +
  geom_polygon(aes(fill = Ladder_score), color = "black")+ 
  scale_fill_gradient(name = "Ladder_score", low = "red", high =  "purple", na.value = "pink")+
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y=element_blank(),
        axis.title.x=element_blank(),
        rect = element_blank())

## Interpretasi

Berdasarkan grafik tersebut, semakin bewarna ungu maka score indeks kebahagiaan semakin tinggi, sedangkan jika semakin bewarna merah nilai indeks kebahagiaannya semakin rendah.Nilai score kebahagian paling rendah beranda di negara Afganistan yang ditunjukkan oleh warna merah. Kemudian negara dengan score tingkat kebahagiaan tertinggi yaitu negara Finland yang ditunjukkan oleh warna ungu tua. Sementara daerah yang bewarna pink menunjukkna pada daerah tersebut tidak diketahui nilai score kebahagiannya.