library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("ggplot2")
library("sf")
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE
library("rnaturalearth")
## Warning: package 'rnaturalearth' was built under R version 4.3.3
library("rnaturalearthdata")
## Warning: package 'rnaturalearthdata' was built under R version 4.3.3
##
## Attaching package: 'rnaturalearthdata'
##
## The following object is masked from 'package:rnaturalearth':
##
## countries110
library(sf)
library(ggspatial)
## Warning: package 'ggspatial' was built under R version 4.3.3
library(dplyr)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.3.3
library(ggforce)
## Warning: package 'ggforce' was built under R version 4.3.3
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
data <- read_excel("C:/Users/Muhammad Rizqa Salas/Downloads/anreg peubah y baru (1).xlsx")
data[] <- lapply(data, as.numeric)
## Warning in lapply(data, as.numeric): NAs introduced by coercion
data <- subset(data, select = -c(Provinsi))
data
## # A tibble: 34 × 10
## `kendaraan pribadi` IPM Penduduk Kemiskinan RLS PDRB Pengeluaran
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.74 72.8 5407900 807000 9.44 38900 9963
## 2 5.04 72.7 15115200 1268000 9.71 62922 10848
## 3 1.78 73.3 5640600 335000 9.18 50264 11130
## 4 2.89 73.5 6614400 485000 8.22 151259 11158
## 5 1.77 72.1 2179800 279000 8.68 76164 10871
## 6 2.61 70.9 3631100 1045000 8.37 68237 11109
## 7 0.770 72.2 8657000 297000 8.91 43757 10840
## 8 2.74 70.4 1494600 1002000 8.18 44984 10336
## 9 0.819 72.2 2060100 67000 8.11 63872 13358
## 10 0.756 76.5 9176600 152000 10.4 145579 14469
## # ℹ 24 more rows
## # ℹ 3 more variables: Investasi <dbl>, Pengangguran <dbl>, UMR <dbl>
data2 <- read_csv("C:/Users/Muhammad Rizqa Salas/Downloads/JNJ (1).csv")
## Rows: 1258 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(data2)
## spc_tbl_ [1,258 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Date : Date[1:1258], format: "2019-04-17" "2019-04-18" ...
## $ Open : num [1:1258] 139 139 137 137 140 ...
## $ High : num [1:1258] 140 140 138 141 140 ...
## $ Low : num [1:1258] 138 136 136 137 139 ...
## $ Close : num [1:1258] 139 138 138 140 139 ...
## $ Adj Close: num [1:1258] 121 120 120 122 121 ...
## $ Volume : num [1:1258] 9543700 10485300 4666500 5790500 5676300 ...
## - attr(*, "spec")=
## .. cols(
## .. Date = col_date(format = ""),
## .. Open = col_double(),
## .. High = col_double(),
## .. Low = col_double(),
## .. Close = col_double(),
## .. `Adj Close` = col_double(),
## .. Volume = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
plot_X1 <- ggplot(data, aes(x = UMR, y = IPM)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "UMR", y = "IPM (indeks)")
plot_X2 <- ggplot(data, aes(x = RLS, y = IPM)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "RLS", y = "IPM")
plot_X3 <- ggplot(data, aes(x = Investasi, y = IPM)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Investasi", y = "IPKM")
plot_X4 <- ggplot(data, aes(x = PDRB, y = IPM)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "PDRB", y = "M")
# Menggabungkan scatter plot ke dalam satu layout
grid.arrange(plot_X1, plot_X2, plot_X3, plot_X4, nrow = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Terlihat bahwa UMR,Rata-rata Lama Sekolah, nilai investasi daerah dan PDRB menunjukkan hubungan linear positif yang menandakan bahwa semakin tinggi nilai investasi dan pendapatan daerah, serta semakin tinggi UMR dan Rata-rata Lama Sekolah, maka nilai IPM cenderung meningkat.
data$kategori <- ifelse(data$Investasi >= 6000, "Investasi tinggi", "Investasi rendah")
ggplot(data, aes(x = PDRB, y = IPM, color = kategori)) +
geom_point() +
labs(title = "Scatter Plot IPM vs. Nilai PDRB", x = "Nilai PDRB", y = "IPM", color = "Kategori") +
theme_minimal()
Scatter plot tersebut menggambarkan bahwa wilayah-wilayah dengan
Investasi tinggi memiliki nilai IPM yang cenderung lebih tinggi dan
memiliki nilai PDRB yang cenderung lebih besar juga dibandingkan dengan
wilayah yang memiliki nilai investasi rendah. Dari plot tersebut juga
bisa diindikasikan bahwa nili investasii memiliki hubungan linear
positif dengan IPM, yang berarti semakin tinggi nilai investasi maka
nilai IPM juga akan meningkat.
data_numerik <- select_if(data, is.numeric)
str(data_numerik)
## tibble [34 × 10] (S3: tbl_df/tbl/data.frame)
## $ kendaraan pribadi: num [1:34] 1.74 5.04 1.78 2.89 1.77 ...
## $ IPM : num [1:34] 72.8 72.7 73.3 73.5 72.1 ...
## $ Penduduk : num [1:34] 5407900 15115200 5640600 6614400 2179800 ...
## $ Kemiskinan : num [1:34] 807000 1268000 335000 485000 279000 ...
## $ RLS : num [1:34] 9.44 9.71 9.18 8.22 8.68 ...
## $ PDRB : num [1:34] 38900 62922 50264 151259 76164 ...
## $ Pengeluaran : num [1:34] 9963 10848 11130 11158 10871 ...
## $ Investasi : num [1:34] 4424 22789 2560 43062 8883 ...
## $ Pengangguran : num [1:34] 6.07 5.82 6.22 4.38 4.64 ...
## $ UMR : num [1:34] 3166460 2522610 2522610 2938564 2698941 ...
data_melt <- cor(data_numerik[sapply(data_numerik,is.numeric)])
data_melt <- melt(data_melt)
ggplot(data_melt, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
labs(title = "Correlation Heatmap",
x = "Variable 1",
y = "Variable 2")
ggplot(data_melt, aes(Var1, Var2, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "steelblue", mid = "blue", high = "darkblue", midpoint = 0, limits = c(-1,1), name="Korelasi") +
labs(title = "Corellogram") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
data_corrl <- round(cor(data_numerik), 1)
data_corrl <- cor(data_corrl)
ggcorrplot(data_corrl)
ggcorrplot(data_corrl, method = "circle")
Dari plot-plotr di atas, dapat disimpulkan bahwa terdapat indikasi
multikolinieritas antara beberapa variabel X, seperti antara nilai
investasi dan banyaknya kendaraan pribadi. Korelasi yang cukup besar
antara kedua variabel ini, yang ditunjukkan dengan warna yang lebih
gelap atau lebih terang dalam plot, menandakan adanya hubungan yang
signifikan di antara keduanya. Oleh karena itu, dapat disimpulkan bahwa
nilai investasi dan banyaknya kendaraan pribadi saling mempengaruhi
karena adanya korelasi yang signifikan.
mod_tangga = lm(Sepal.Length ~ cut(Petal.Length,3),data=iris)
summary(mod_tangga)
##
## Call:
## lm(formula = Sepal.Length ~ cut(Petal.Length, 3), data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0111 -0.3111 -0.0060 0.2889 1.2261
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.00600 0.06913 72.409 <2e-16 ***
## cut(Petal.Length, 3)(2.97,4.93] 0.90511 0.09594 9.434 <2e-16 ***
## cut(Petal.Length, 3)(4.93,6.91] 1.66791 0.09987 16.700 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4889 on 147 degrees of freedom
## Multiple R-squared: 0.6562, Adjusted R-squared: 0.6515
## F-statistic: 140.3 on 2 and 147 DF, p-value: < 2.2e-16
ggplot(iris,aes(x=Petal.Length, y=Sepal.Length)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,3),
lty = 1, col = "blue",se = F)+
theme_bw()
ggplot(iris, aes(x = Petal.Length, y = Sepal.Length)) +
geom_point(color = "blue", size = 3, alpha = 0.6) +
geom_smooth(method = "loess", color = "red", linetype = "dashed", size = 1.5) +
labs(
x = "Petal Length",
y = "Sepal Length",
title = "LOESS Visualization of Petal Length vs. Sepal Length",
subtitle = "Smoothed scatterplot with LOESS curve",
caption = "Data Iris"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
data2 <- head(data2, -1)
ggplot(data2, aes(x =Date, y = Open)) +
geom_point() +
labs(title = "Scatter Plot of Time Series Data (JNJ)",
x = "Date",
y = "Value (open)")
ggplot(data2, aes(x =Date, y = Open)) +
geom_point() +
geom_line() +
labs(title = "Scatter Plot of Time Series Data (JNJ)",
x = "Date",
y = "Value (open)")
ggplot(data2, aes(x =Date, y = Open)) +
geom_line() +
labs(title = "Scatter Plot of Time Series Data (JNJ)",
x = "Date",
y = "Value (open)")
calculate_moving_average <- function(data2, window_size) {
ma_values <- zoo::rollmean(data2$Open, k = window_size, align = "right", fill = NA)
ma_values_padded <- c(rep(NA, window_size - 1), ma_values)
data2$ma <- ma_values_padded[1:nrow(data2)] # Extract only the necessary number of rows
return(data2)
}
window_size <- 3
data2 <- calculate_moving_average(data2, window_size)
# Create the plot
ggplot(data2, aes(x = Date)) +
geom_line(aes(y = Open), color = "white", size = 1) +
geom_line(aes(y = ma), color = "steelblue", linetype = "dashed", size = 1) + # emulusan
geom_ribbon(aes(ymin = -Inf, ymax = ma), fill = "steelblue", alpha = 0.2) + # Area under curve
labs(title = paste("JNJ data with Moving Average (Window Size:", window_size, ")"),
x = "Date",
y = "Value (open)") +
theme_minimal()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
data_open <- data2$Open
data.ts <- ts(data_open)
plot(data.ts, xlab ="Waktu", ylab = "Data Open BBCA", col="green", main = "Plot Data Open BBCA")
points(data.ts)
data1 <- read_csv("C:/Users/Muhammad Rizqa Salas/Downloads/PG (2).csv")
## Rows: 1258 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data2 <- read_csv("C:/Users/Muhammad Rizqa Salas/Downloads/JNJ (1).csv")
## Rows: 1258 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data1 <- head(data1, -1)
data2 <- head(data2, -1)
# Memplot kedua seri waktu
ggplot() +
geom_line(data = data1, aes(x = Date, y = Open, color = "PG")) +
geom_line(data = data2, aes(x = Date, y = Open, color = "JNJ")) +
labs(title = "Perbandingan Dua Data Time Series (PG vs. JNJ)",
x = "Date",
y = "Value",
color = "Series") +
scale_color_manual(values = c("PG" = "red", "JNJ" = "green")) +
theme_minimal()
Grafik di atas mengilustrasikan perbandingan harga saham pembukaan
(open) antara JNJ dan PG. Pada tahun 2020-2022 pola grafik keduanya
hampir serupa, namun pada tahun 2023-2024 keduanya memiliki pola grafik
yang berbeda. Saham JNJ pada tahun 2023-2024 memiliki pola yang
cenderung menurun, sedangkan saham PG memiliki pola grafik yang
cenderung naik pada interval waktu yang sama.
my_sf <- read_sf("C:/Users/Muhammad Rizqa Salas/Downloads/BATAS PROVINSI DESEMBER 2019 DUKCAPIL/BATAS PROVINSI DESEMBER 2019 DUKCAPIL/BATAS_PROVINSI_DESEMBER_2019_DUKCAPIL.shp")
data3 <- read_xlsx("C:/Users/Muhammad Rizqa Salas/Downloads/data kendaraan pribadi.xlsx")
data3
## # A tibble: 34 × 2
## PROVINSI persentase
## <chr> <dbl>
## 1 ACEH 1.74
## 2 SUMATERA UTARA 5.04
## 3 SUMATERA BARAT 1.78
## 4 RIAU 2.89
## 5 JAMBI 1.77
## 6 SUMATERA SELATAN 2.61
## 7 BENGKULU 0.770
## 8 LAMPUNG 2.74
## 9 KEPULAUAN BANGKA BELITUNG 0.819
## 10 KEPULAUAN RIAU 0.756
## # ℹ 24 more rows
data3$PROVINSI <- toupper(data3$PROVINSI)
Persen.Geo <- merge(x=my_sf, y=data3, by="PROVINSI",all.x = TRUE, all.y=TRUE)
ggplot(Persen.Geo) +
geom_sf(aes(fill= persentase)
, color = "#69b3a2") +
scale_fill_gradient(low="lightgreen", high="darkgreen") +
theme_void()