Library

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.3.2

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.3.2

## Warning: package 'readr' was built under R version 4.3.2

## Warning: package 'dplyr' was built under R version 4.3.2

## Warning: package 'lubridate' was built under R version 4.3.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library("ggplot2")
library("sf")

## Warning: package 'sf' was built under R version 4.3.3

## Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE

library("rnaturalearth")

## Warning: package 'rnaturalearth' was built under R version 4.3.3

library("rnaturalearthdata")

## Warning: package 'rnaturalearthdata' was built under R version 4.3.3

## 
## Attaching package: 'rnaturalearthdata'
## 
## The following object is masked from 'package:rnaturalearth':
## 
##     countries110

library(sf)
library(ggspatial)

## Warning: package 'ggspatial' was built under R version 4.3.3

library(dplyr)
library(reshape2)

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library(ggcorrplot)

## Warning: package 'ggcorrplot' was built under R version 4.3.3

library(ggforce)

## Warning: package 'ggforce' was built under R version 4.3.3

library(readxl)

## Warning: package 'readxl' was built under R version 4.3.2

library(gridExtra)

## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

library(plotly)

## Warning: package 'plotly' was built under R version 4.3.2

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Data

data <- read_excel("C:/Users/Muhammad Rizqa Salas/Downloads/anreg peubah y baru (1).xlsx")
data[] <- lapply(data, as.numeric)

## Warning in lapply(data, as.numeric): NAs introduced by coercion

data <- subset(data, select = -c(Provinsi))
data

## # A tibble: 34 × 10
##    `kendaraan pribadi`   IPM Penduduk Kemiskinan   RLS   PDRB Pengeluaran
##                  <dbl> <dbl>    <dbl>      <dbl> <dbl>  <dbl>       <dbl>
##  1               1.74   72.8  5407900     807000  9.44  38900        9963
##  2               5.04   72.7 15115200    1268000  9.71  62922       10848
##  3               1.78   73.3  5640600     335000  9.18  50264       11130
##  4               2.89   73.5  6614400     485000  8.22 151259       11158
##  5               1.77   72.1  2179800     279000  8.68  76164       10871
##  6               2.61   70.9  3631100    1045000  8.37  68237       11109
##  7               0.770  72.2  8657000     297000  8.91  43757       10840
##  8               2.74   70.4  1494600    1002000  8.18  44984       10336
##  9               0.819  72.2  2060100      67000  8.11  63872       13358
## 10               0.756  76.5  9176600     152000 10.4  145579       14469
## # ℹ 24 more rows
## # ℹ 3 more variables: Investasi <dbl>, Pengangguran <dbl>, UMR <dbl>

data2 <- read_csv("C:/Users/Muhammad Rizqa Salas/Downloads/JNJ (1).csv")

## Rows: 1258 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(data2)

## spc_tbl_ [1,258 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Date     : Date[1:1258], format: "2019-04-17" "2019-04-18" ...
##  $ Open     : num [1:1258] 139 139 137 137 140 ...
##  $ High     : num [1:1258] 140 140 138 141 140 ...
##  $ Low      : num [1:1258] 138 136 136 137 139 ...
##  $ Close    : num [1:1258] 139 138 138 140 139 ...
##  $ Adj Close: num [1:1258] 121 120 120 122 121 ...
##  $ Volume   : num [1:1258] 9543700 10485300 4666500 5790500 5676300 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Date = col_date(format = ""),
##   ..   Open = col_double(),
##   ..   High = col_double(),
##   ..   Low = col_double(),
##   ..   Close = col_double(),
##   ..   `Adj Close` = col_double(),
##   ..   Volume = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

Peubah Numerik

Korelasi

plot_X1 <- ggplot(data, aes(x = UMR, y = IPM)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE) + 
  labs(x = "UMR", y = "IPM (indeks)")

plot_X2 <- ggplot(data, aes(x = RLS, y = IPM)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE) + 
  labs(x = "RLS", y = "IPM")

plot_X3 <- ggplot(data, aes(x = Investasi, y = IPM)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE) + 
  labs(x = "Investasi", y = "IPKM")

plot_X4 <- ggplot(data, aes(x = PDRB, y = IPM)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE) + 
  labs(x = "PDRB", y = "M")

# Menggabungkan scatter plot ke dalam satu layout
grid.arrange(plot_X1, plot_X2, plot_X3, plot_X4, nrow = 2)

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Terlihat bahwa UMR,Rata-rata Lama Sekolah, nilai investasi daerah dan PDRB menunjukkan hubungan linear positif yang menandakan bahwa semakin tinggi nilai investasi dan pendapatan daerah, serta semakin tinggi UMR dan Rata-rata Lama Sekolah, maka nilai IPM cenderung meningkat.

data$kategori <- ifelse(data$Investasi >= 6000, "Investasi tinggi", "Investasi rendah")

ggplot(data, aes(x = PDRB, y = IPM, color = kategori)) +
  geom_point() +
  labs(title = "Scatter Plot IPM vs. Nilai PDRB", x = "Nilai PDRB", y = "IPM", color = "Kategori") +
  theme_minimal()

Scatter plot tersebut menggambarkan bahwa wilayah-wilayah dengan Investasi tinggi memiliki nilai IPM yang cenderung lebih tinggi dan memiliki nilai PDRB yang cenderung lebih besar juga dibandingkan dengan wilayah yang memiliki nilai investasi rendah. Dari plot tersebut juga bisa diindikasikan bahwa nili investasii memiliki hubungan linear positif dengan IPM, yang berarti semakin tinggi nilai investasi maka nilai IPM juga akan meningkat.

Matrix Plot

data_numerik <- select_if(data, is.numeric)
str(data_numerik)

## tibble [34 × 10] (S3: tbl_df/tbl/data.frame)
##  $ kendaraan pribadi: num [1:34] 1.74 5.04 1.78 2.89 1.77 ...
##  $ IPM              : num [1:34] 72.8 72.7 73.3 73.5 72.1 ...
##  $ Penduduk         : num [1:34] 5407900 15115200 5640600 6614400 2179800 ...
##  $ Kemiskinan       : num [1:34] 807000 1268000 335000 485000 279000 ...
##  $ RLS              : num [1:34] 9.44 9.71 9.18 8.22 8.68 ...
##  $ PDRB             : num [1:34] 38900 62922 50264 151259 76164 ...
##  $ Pengeluaran      : num [1:34] 9963 10848 11130 11158 10871 ...
##  $ Investasi        : num [1:34] 4424 22789 2560 43062 8883 ...
##  $ Pengangguran     : num [1:34] 6.07 5.82 6.22 4.38 4.64 ...
##  $ UMR              : num [1:34] 3166460 2522610 2522610 2938564 2698941 ...

data_melt <- cor(data_numerik[sapply(data_numerik,is.numeric)])

data_melt <- melt(data_melt) 

ggplot(data_melt, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile() +
  labs(title = "Correlation Heatmap",
       x = "Variable 1",
       y = "Variable 2")

ggplot(data_melt, aes(Var1, Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "steelblue", mid = "blue", high = "darkblue", midpoint = 0, limits = c(-1,1), name="Korelasi") +
  labs(title = "Corellogram") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

data_corrl <- round(cor(data_numerik), 1)

data_corrl <- cor(data_corrl)
ggcorrplot(data_corrl)

ggcorrplot(data_corrl, method = "circle")

Dari plot-plotr di atas, dapat disimpulkan bahwa terdapat indikasi multikolinieritas antara beberapa variabel X, seperti antara nilai investasi dan banyaknya kendaraan pribadi. Korelasi yang cukup besar antara kedua variabel ini, yang ditunjukkan dengan warna yang lebih gelap atau lebih terang dalam plot, menandakan adanya hubungan yang signifikan di antara keduanya. Oleh karena itu, dapat disimpulkan bahwa nilai investasi dan banyaknya kendaraan pribadi saling mempengaruhi karena adanya korelasi yang signifikan.

Piecewise Constant

mod_tangga = lm(Sepal.Length ~ cut(Petal.Length,3),data=iris)
summary(mod_tangga)

## 
## Call:
## lm(formula = Sepal.Length ~ cut(Petal.Length, 3), data = iris)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0111 -0.3111 -0.0060  0.2889  1.2261 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      5.00600    0.06913  72.409   <2e-16 ***
## cut(Petal.Length, 3)(2.97,4.93]  0.90511    0.09594   9.434   <2e-16 ***
## cut(Petal.Length, 3)(4.93,6.91]  1.66791    0.09987  16.700   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4889 on 147 degrees of freedom
## Multiple R-squared:  0.6562, Adjusted R-squared:  0.6515 
## F-statistic: 140.3 on 2 and 147 DF,  p-value: < 2.2e-16

ggplot(iris,aes(x=Petal.Length, y=Sepal.Length)) +
                 geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~cut(x,3), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

ggplot(iris, aes(x = Petal.Length, y = Sepal.Length)) +
  geom_point(color = "blue", size = 3, alpha = 0.6) +
  geom_smooth(method = "loess", color = "red", linetype = "dashed", size = 1.5) +
  labs(
    x = "Petal Length", 
    y = "Sepal Length", 
    title = "LOESS Visualization of Petal Length vs. Sepal Length",
    subtitle = "Smoothed scatterplot with LOESS curve",
    caption = "Data Iris"
  ) +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `geom_smooth()` using formula = 'y ~ x'

Time Series

Scatter Plot Time Series

data2 <- head(data2, -1)
ggplot(data2, aes(x =Date, y = Open)) +
  geom_point() +
  labs(title = "Scatter Plot of Time Series Data (JNJ)",
       x = "Date",
       y = "Value (open)")

ggplot(data2, aes(x =Date, y = Open)) +
  geom_point() +
  geom_line() + 
  labs(title = "Scatter Plot of Time Series Data (JNJ)",
       x = "Date",
       y = "Value (open)")

ggplot(data2, aes(x =Date, y = Open)) +
  geom_line() +
  labs(title = "Scatter Plot of Time Series Data (JNJ)",
       x = "Date",
       y = "Value (open)")

calculate_moving_average <- function(data2, window_size) {
  ma_values <- zoo::rollmean(data2$Open, k = window_size, align = "right", fill = NA)
  ma_values_padded <- c(rep(NA, window_size - 1), ma_values)
  data2$ma <- ma_values_padded[1:nrow(data2)]  # Extract only the necessary number of rows
  return(data2)
}

window_size <- 3
data2 <- calculate_moving_average(data2, window_size)

# Create the plot
ggplot(data2, aes(x = Date)) +
  geom_line(aes(y = Open), color = "white", size = 1) +  
  geom_line(aes(y = ma), color = "steelblue", linetype = "dashed", size = 1) +  # emulusan
  geom_ribbon(aes(ymin = -Inf, ymax = ma), fill = "steelblue", alpha = 0.2) +  # Area under curve
  labs(title = paste("JNJ data with Moving Average (Window Size:", window_size, ")"),
       x = "Date",
       y = "Value (open)") +
  theme_minimal()

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

data_open <- data2$Open

data.ts <- ts(data_open)

plot(data.ts, xlab ="Waktu", ylab = "Data Open BBCA", col="green", main = "Plot Data Open BBCA")
points(data.ts)

data1 <- read_csv("C:/Users/Muhammad Rizqa Salas/Downloads/PG (2).csv")

## Rows: 1258 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data2 <- read_csv("C:/Users/Muhammad Rizqa Salas/Downloads/JNJ (1).csv")

## Rows: 1258 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data1 <- head(data1, -1)
data2 <- head(data2, -1)
# Memplot kedua seri waktu
ggplot() +
  geom_line(data = data1, aes(x = Date, y = Open, color = "PG")) +
  geom_line(data = data2, aes(x = Date, y = Open, color = "JNJ")) +
  labs(title = "Perbandingan Dua Data Time Series (PG vs. JNJ)",
       x = "Date",
       y = "Value",
       color = "Series") +
  scale_color_manual(values = c("PG" = "red", "JNJ" = "green")) +
  theme_minimal()

Grafik di atas mengilustrasikan perbandingan harga saham pembukaan (open) antara JNJ dan PG. Pada tahun 2020-2022 pola grafik keduanya hampir serupa, namun pada tahun 2023-2024 keduanya memiliki pola grafik yang berbeda. Saham JNJ pada tahun 2023-2024 memiliki pola yang cenderung menurun, sedangkan saham PG memiliki pola grafik yang cenderung naik pada interval waktu yang sama.

my_sf <- read_sf("C:/Users/Muhammad Rizqa Salas/Downloads/BATAS PROVINSI DESEMBER 2019 DUKCAPIL/BATAS PROVINSI DESEMBER 2019 DUKCAPIL/BATAS_PROVINSI_DESEMBER_2019_DUKCAPIL.shp")
data3 <- read_xlsx("C:/Users/Muhammad Rizqa Salas/Downloads/data kendaraan pribadi.xlsx")
data3

## # A tibble: 34 × 2
##    PROVINSI                  persentase
##    <chr>                          <dbl>
##  1 ACEH                           1.74 
##  2 SUMATERA UTARA                 5.04 
##  3 SUMATERA BARAT                 1.78 
##  4 RIAU                           2.89 
##  5 JAMBI                          1.77 
##  6 SUMATERA SELATAN               2.61 
##  7 BENGKULU                       0.770
##  8 LAMPUNG                        2.74 
##  9 KEPULAUAN BANGKA BELITUNG      0.819
## 10 KEPULAUAN RIAU                 0.756
## # ℹ 24 more rows

data3$PROVINSI <- toupper(data3$PROVINSI)
Persen.Geo <- merge(x=my_sf, y=data3, by="PROVINSI",all.x = TRUE, all.y=TRUE)
ggplot(Persen.Geo) +
  geom_sf(aes(fill= persentase)
          , color = "#69b3a2") +
  scale_fill_gradient(low="lightgreen", high="darkgreen") +
  theme_void()

Tugas Visdat Praktikum 10

Muhammad Rizqa Salas

2024-05-14