Kelompok 8
Andreas Erwin Susanto - 2602202785
Dennis -
2602100644
Marvel Shallom Isaiah - 2602098766
Vicky John -
2602096155
Instansi : BINUS University, Alam Sutera
Nama Mata Kuliah :
Visual Analytics and Application (LB55)
library(readxl)
Visual <- read_excel("C:/Users/denni/OneDrive - Bina Nusantara/BINUS/5th Semester/Visual Analytics and Application/assignment/Visual.xlsx")
packages <- c("dplyr", "plotly", "readxl", "ggplot2", "pscl", "corrplot", "leaps", "car","lmtest")
lapply(packages, function(pkg) {
if (!require(pkg, character.only = TRUE)) {
install.packages(pkg)
library(pkg, character.only = TRUE)
}
})
View(Visual)
head(Visual) # Inspect the first few rows
## # A tibble: 6 × 12
## CN T CP LP G EPS MC PER PBV DER ROE PI
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ava R… AVA 0.238 0.230 0.0353 0.135 6.07e7 0.963 1.35 0.0109 -0.442 1
## 2 Seven… SWM 0.395 0.645 -0.388 0.124 6.22e8 1.33 0.629 1.95 -0.524 0
## 3 Heali… HLS 2.84 4.89 -0.419 0.484 2.08e9 2.77 0.914 0.926 0.0293 0
## 4 Tabco… TAH 1.08 0.955 0.125 0.163 2.45e9 3.40 0.989 0.117 0.0075 1
## 5 Enero… EGG 2.82 4 -0.295 0.309 2.61e8 3.66 0.658 0.289 0 0
## 6 Iluka… ILU 9.53 9.89 -0.0364 1.38 4.05e9 3.98 1.05 0.0330 0 0
str(Visual) # Check structure of the dataset
## tibble [105 × 12] (S3: tbl_df/tbl/data.frame)
## $ CN : chr [1:105] "Ava Risk Group Ltd" "Seven West Media Ltd" "Healius Ltd" "Tabcorp Holdings Ltd" ...
## $ T : chr [1:105] "AVA" "SWM" "HLS" "TAH" ...
## $ CP : num [1:105] 0.238 0.395 2.842 1.075 2.82 ...
## $ LP : num [1:105] 0.23 0.645 4.887 0.955 4 ...
## $ G : num [1:105] 0.0353 -0.3876 -0.4186 0.1253 -0.295 ...
## $ EPS: num [1:105] 0.135 0.124 0.484 0.163 0.309 ...
## $ MC : num [1:105] 6.07e+07 6.22e+08 2.08e+09 2.45e+09 2.61e+08 ...
## $ PER: num [1:105] 0.963 1.331 2.768 3.405 3.657 ...
## $ PBV: num [1:105] 1.347 0.629 0.914 0.989 0.658 ...
## $ DER: num [1:105] 0.0109 1.9499 0.9258 0.1169 0.2889 ...
## $ ROE: num [1:105] -0.4423 -0.5235 0.0293 0.0075 0 ...
## $ PI : num [1:105] 1 0 0 1 0 0 1 0 0 0 ...
sapply(Visual,typeof)
## CN T CP LP G EPS
## "character" "character" "double" "double" "double" "double"
## MC PER PBV DER ROE PI
## "double" "double" "double" "double" "double" "double"
sum(is.na(Visual)) # Check for any missing data
## [1] 0
Visual$T = as.factor(Visual$T)
Visual <- Visual %>%
mutate(across(c(EPS, CP, MC, LP, PER, PBV, DER, ROE),
~ scale(.),
.names = "{.col}_z"))
Visual <- Visual %>%
select(-EPS, -CP, -LP, -PBV, -DER, -ROE)
cplpplot <- ggplot(Visual, aes(x = CP_z, y = LP_z, color = G)) +
geom_point(size = 3, alpha = 0.7) +
scale_color_gradient(low = "blue", high = "red") +
labs(
title = "Visualisasi Hubungan Current Price dan Last Price",
subtitle = "Dengan Pertumbuhan (G) sebagai Faktor Warna",
x = "Current Price (CP)",
y = "Last Price (LP)",
color = "Growth (G)"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 12, face = "italic"),
axis.title = element_text(size = 12),
legend.title = element_text(size = 12)
)
cplpplot
ggsave("CPLP_z_Distribution.png")
Visual$EPS_z = as.numeric(Visual$EPS_z)
hist_data <- ggplot2::ggplot_build(
ggplot(Visual, aes(x = EPS_z)) +
geom_histogram(bins = 30)
)$data[[1]]
epshistogram <- ggplot(Visual, aes(x = EPS_z)) +
geom_histogram(
bins = 30,
color = "black",
fill = "cyan",
alpha = 0.7
) +
geom_line(
data = data.frame(
x = hist_data$x,
y = hist_data$y
),
aes(x = x, y = y),
color = "purple4",
size = 1
) +
geom_vline(aes(xintercept = mean(EPS_z, na.rm = TRUE)),
color = "green",
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = median(EPS_z, na.rm = TRUE)),
color = "blue",
linetype = "dotted",
size = 1) +
labs(
title = "Distribusi EPS",
subtitle = "Histogram dengan Rata-rata (Hijau), Median (Biru), dan Poligon Frekuensi (Ungu)",
x = "Earnings Per Share (EPS)",
y = "Frekuensi"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 12, face = "italic"),
axis.title = element_text(size = 12)
)
epshistogram
ggsave("EPS_z_Distribution.png")
dataMC<- as.numeric(log10(Visual$MC))
ggplot(data = data.frame(MC = dataMC), aes(x = dataMC)) +
geom_histogram(
bins = 13,
color = "black",
fill = "blue1",
alpha = 0.7) +
geom_freqpoly(
bins = 13, #
color = "darkblue",
size = 1) +
geom_vline(aes(xintercept = mean(dataMC, na.rm = TRUE)),
color = 'chartreuse',
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = median(dataMC, na.rm = TRUE)),
color = "coral",
linetype = "dotted",
size = 1) +
labs(
title = "Distribusi Log Market Cap untuk Perusahaan di Australia",
subtitle = "Histogram dengan Frequency Polygon, Rata-Rata (Green), dan Median (Orange)",
x = "Logged Market Cap",
y = "Frekuensi") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 12, face = "italic"),
axis.title = element_text(size = 12))
ggsave("MC_Distribution.png", width = 8, height = 6, dpi = 300)
# Convert the PER column to numeric
dataPER <- as.numeric(Visual$PER_z)
# Create the plot
ggplot(data = data.frame(dataPER = dataPER), aes(x = dataPER)) +
geom_histogram(
bins = 10,
color = "black",
fill = "cyan",
alpha = 0.7
) +
geom_freqpoly(
bins = 10, # Ensure the number of bins matches the histogram
color = "darkblue",
size = 1
) +
# Add a vertical line for the mean
geom_vline(
xintercept = mean(dataPER, na.rm = TRUE), # Use xintercept outside aes()
color = "chartreuse4",
linetype = "dashed",
size = 1
) +
# Add a vertical line for the median
geom_vline(
xintercept = median(dataPER, na.rm = TRUE), # Use xintercept outside aes()
color = "darkorange",
linetype = "dotted",
size = 1
) +
# Add labels and titles
labs(
title = "Distribusi PER untuk Perusahaan di Australia",
subtitle = "Histogram dengan Frequency Polygon, Rata-Rata (Green), dan Median (Orange)",
x = "Price Earnings Ratio",
y = "Frekuensi"
) +
# Apply a minimal theme
theme_minimal() +
# Customize theme elements
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 12, face = "italic"),
axis.title = element_text(size = 12)
)
library(ggplot2)
pbv_z_data <- as.numeric(Visual$PBV_z)
ggplot(data = data.frame(PBV_z = pbv_z_data), aes(x = PBV_z)) +
geom_histogram(
bins = 30,
color = "black",
fill = "lightblue",
alpha = 0.7
) +
geom_freqpoly(
bins = 30,
color = "purple",
size = 1
) +
geom_vline(aes(xintercept = mean(PBV_z, na.rm = TRUE)),
color = "darkgreen",
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = median(PBV_z, na.rm = TRUE)),
color = "blue",
linetype = "dotted",
size = 1) +
labs(
title = "Distribusi PBV untuk Perusahaan di Australia",
subtitle = "Histogram dengan Frequency Polygon, Rata-Rata (Dark Green), dan Median (Blue)",
x = "Price-to-Book Value (PBV)",
y = "Frekuensi"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 12, face = "italic"),
axis.title = element_text(size = 12)
)
ggsave("PBV_z_Distribution.png", width = 8, height = 6, dpi = 300)
library(ggplot2)
der_z_data <- as.numeric(Visual$DER_z)
ggplot(data = data.frame(DER_z = der_z_data), aes(x = DER_z)) +
geom_histogram(
bins = 30,
color = "black",
fill = "lightyellow",
alpha = 0.7
) +
geom_freqpoly(
bins = 30,
color = "darkgreen",
size = 1
) +
geom_vline(aes(xintercept = mean(DER_z, na.rm = TRUE)),
color = "orange",
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = median(DER_z, na.rm = TRUE)),
color = "blue",
linetype = "dotted",
size = 1) +
labs(
title = "Distribusi DER untuk Perusahaan di Australia",
subtitle = "Histogram dengan Frequency Polygon, Rata-Rata (Orange), dan Median (Blue)",
x = "Debt-to-Equity Ratio (DER)",
y = "Frekuensi"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 12, face = "italic"),
axis.title = element_text(size = 12)
)
ggsave("DER_z_Distribution.png", width = 8, height = 6, dpi = 300)
library(ggplot2)
roe_z_data <- as.numeric(Visual$ROE_z)
ggplot(data = data.frame(ROE_z = roe_z_data), aes(x = ROE_z)) +
geom_histogram(
bins = 30,
color = "black",
fill = "lightpink",
alpha = 0.7
) +
geom_freqpoly(
bins = 30,
color = "navy",
size = 1
) +
geom_vline(aes(xintercept = mean(ROE_z, na.rm = TRUE)),
color = "purple",
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = median(ROE_z, na.rm = TRUE)),
color = "cyan",
linetype = "dotted",
size = 1) +
labs(
title = "Distribusi ROE untuk Perusahaan di Australia",
subtitle = "Histogram dengan Frequency Polygon, Rata-Rata (Purple), dan Median (Cyan)",
x = "Return on Equity (ROE)",
y = "Frekuensi"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 12, face = "italic"),
axis.title = element_text(size = 12)
)
ggsave("ROE_z_Distribution.png", width = 8, height = 6, dpi = 300)
Visual <- Visual %>%
select(-MC, -PER)
summary(Visual)
## CN T G PI
## Length:105 A1N : 1 Min. :-0.57910 Min. :0.0000
## Class :character ABG : 1 1st Qu.:-0.28851 1st Qu.:0.0000
## Mode :character AGL : 1 Median :-0.09510 Median :0.0000
## ALK : 1 Mean :-0.05918 Mean :0.3429
## ALL : 1 3rd Qu.: 0.09935 3rd Qu.:1.0000
## ANN : 1 Max. : 2.05804 Max. :1.0000
## (Other):99
## EPS_z CP_z.V1 MC_z.V1 LP_z.V1
## Min. :-0.5637 Min. :-0.492033 Min. :-0.352372 Min. :-0.499429
## 1st Qu.:-0.4938 1st Qu.:-0.453935 1st Qu.:-0.332911 1st Qu.:-0.462319
## Median :-0.3918 Median :-0.351141 Median :-0.297356 Median :-0.375824
## Mean : 0.0000 Mean : 0.000000 Mean : 0.000000 Mean : 0.000000
## 3rd Qu.: 0.1065 3rd Qu.: 0.029436 3rd Qu.:-0.178401 3rd Qu.:-0.038470
## Max. : 5.4386 Max. : 6.344175 Max. : 7.246862 Max. : 5.766424
##
## PER_z.V1 PBV_z.V1 DER_z.V1
## Min. :-0.360501 Min. :-0.654982 Min. :-0.676339
## 1st Qu.:-0.264081 1st Qu.:-0.468269 1st Qu.:-0.529013
## Median :-0.158884 Median :-0.289244 Median :-0.315978
## Mean : 0.000000 Mean : 0.000000 Mean : 0.000000
## 3rd Qu.:-0.040798 3rd Qu.:-0.020934 3rd Qu.:-0.069160
## Max. : 9.509412 Max. : 5.585594 Max. : 5.189499
##
## ROE_z.V1
## Min. :-7.234575
## 1st Qu.: 0.198013
## Median : 0.198013
## Mean : 0.000000
## 3rd Qu.: 0.205099
## Max. : 1.846078
##
numeric_data <- Visual[, sapply(Visual, is.numeric)]
corr_matrix <- cor(numeric_data, use = "complete.obs", method = "pearson")
corr_matrix
## G PI EPS_z CP_z MC_z
## G 1.000000000 0.711213400 0.027816975 0.003032789 0.12595367
## PI 0.711213400 1.000000000 0.002788379 -0.072380820 0.24206889
## EPS_z 0.027816975 0.002788379 1.000000000 0.756256626 0.65077790
## CP_z 0.003032789 -0.072380820 0.756256626 1.000000000 0.48657002
## MC_z 0.125953673 0.242068894 0.650777904 0.486570021 1.00000000
## LP_z -0.070001759 -0.142871744 0.737065087 0.980817620 0.41940177
## PER_z -0.075437461 -0.095132663 -0.058876719 0.054518443 -0.02627452
## PBV_z -0.069284636 -0.148158189 0.164970455 0.403778952 0.08529233
## DER_z 0.006456890 0.016380029 0.210467532 0.175817414 0.17435222
## ROE_z -0.099095069 -0.008387892 0.084528248 0.108941732 0.06151069
## LP_z PER_z PBV_z DER_z ROE_z
## G -0.07000176 -0.07543746 -0.06928464 0.00645689 -0.099095069
## PI -0.14287174 -0.09513266 -0.14815819 0.01638003 -0.008387892
## EPS_z 0.73706509 -0.05887672 0.16497046 0.21046753 0.084528248
## CP_z 0.98081762 0.05451844 0.40377895 0.17581741 0.108941732
## MC_z 0.41940177 -0.02627452 0.08529233 0.17435222 0.061510695
## LP_z 1.00000000 0.06495393 0.46556272 0.20245191 0.114891742
## PER_z 0.06495393 1.00000000 0.27418780 -0.04340677 0.098899123
## PBV_z 0.46556272 0.27418780 1.00000000 -0.01118179 0.163331510
## DER_z 0.20245191 -0.04340677 -0.01118179 1.00000000 0.017209250
## ROE_z 0.11489174 0.09889912 0.16333151 0.01720925 1.000000000
corrplot(corr_matrix, method = "circle")
ggsave("Correlation_Matrix.png")
## Saving 7 x 5 in image
subsets_model <- regsubsets(PI ~ EPS_z + MC_z + PER_z + PBV_z + DER_z + ROE_z, data = Visual, nbest = 1)
summary(subsets_model)
## Subset selection object
## Call: regsubsets.formula(PI ~ EPS_z + MC_z + PER_z + PBV_z + DER_z +
## ROE_z, data = Visual, nbest = 1)
## 6 Variables (and intercept)
## Forced in Forced out
## EPS_z FALSE FALSE
## MC_z FALSE FALSE
## PER_z FALSE FALSE
## PBV_z FALSE FALSE
## DER_z FALSE FALSE
## ROE_z FALSE FALSE
## 1 subsets of each size up to 6
## Selection Algorithm: exhaustive
## EPS_z MC_z PER_z PBV_z DER_z ROE_z
## 1 ( 1 ) " " "*" " " " " " " " "
## 2 ( 1 ) "*" "*" " " " " " " " "
## 3 ( 1 ) "*" "*" " " "*" " " " "
## 4 ( 1 ) "*" "*" "*" "*" " " " "
## 5 ( 1 ) "*" "*" "*" "*" " " "*"
## 6 ( 1 ) "*" "*" "*" "*" "*" "*"
logit_model3 <- glm(PI ~ EPS_z + MC_z + PBV_z,
data = Visual,
family = binomial())
summary(logit_model3)
##
## Call:
## glm(formula = PI ~ EPS_z + MC_z + PBV_z, family = binomial(),
## data = Visual)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7165 0.2468 -2.904 0.00369 **
## EPS_z -1.6000 0.7211 -2.219 0.02650 *
## MC_z 2.5428 0.9797 2.596 0.00944 **
## PBV_z -0.6663 0.4304 -1.548 0.12166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 135.01 on 104 degrees of freedom
## Residual deviance: 115.32 on 101 degrees of freedom
## AIC: 123.32
##
## Number of Fisher Scoring iterations: 6
logit_model4 <- glm(PI ~ EPS_z + MC_z + PER_z + PBV_z,
data = Visual,
family = binomial())
summary(logit_model4)
##
## Call:
## glm(formula = PI ~ EPS_z + MC_z + PER_z + PBV_z, family = binomial(),
## data = Visual)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7268 0.2509 -2.897 0.00377 **
## EPS_z -1.6611 0.7316 -2.271 0.02318 *
## MC_z 2.5981 0.9921 2.619 0.00882 **
## PER_z -0.2806 0.4928 -0.569 0.56906
## PBV_z -0.5891 0.4504 -1.308 0.19094
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 135.01 on 104 degrees of freedom
## Residual deviance: 114.61 on 100 degrees of freedom
## AIC: 124.61
##
## Number of Fisher Scoring iterations: 6
logit_model5 <- glm(PI ~ EPS_z + MC_z + PER_z + PBV_z + ROE_z,
data = Visual,
family = binomial())
summary(logit_model5)
##
## Call:
## glm(formula = PI ~ EPS_z + MC_z + PER_z + PBV_z + ROE_z, family = binomial(),
## data = Visual)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.72674 0.25090 -2.897 0.00377 **
## EPS_z -1.66141 0.73072 -2.274 0.02299 *
## MC_z 2.59560 0.99112 2.619 0.00882 **
## PER_z -0.28468 0.50061 -0.569 0.56957
## PBV_z -0.59121 0.45067 -1.312 0.18957
## ROE_z 0.02542 0.21856 0.116 0.90741
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 135.01 on 104 degrees of freedom
## Residual deviance: 114.60 on 99 degrees of freedom
## AIC: 126.6
##
## Number of Fisher Scoring iterations: 6
logit_model6 <- glm(PI ~ EPS_z + MC_z + PER_z + PBV_z + DER_z + ROE_z,
data = Visual,
family = binomial())
summary(logit_model6)
##
## Call:
## glm(formula = PI ~ EPS_z + MC_z + PER_z + PBV_z + DER_z + ROE_z,
## family = binomial(), data = Visual)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.74192 0.25381 -2.923 0.00347 **
## EPS_z -1.73746 0.73390 -2.367 0.01791 *
## MC_z 2.69541 0.98834 2.727 0.00639 **
## PER_z -0.31860 0.56742 -0.561 0.57447
## PBV_z -0.54511 0.44759 -1.218 0.22327
## DER_z -0.20182 0.29462 -0.685 0.49333
## ROE_z 0.02456 0.21641 0.114 0.90963
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 135.01 on 104 degrees of freedom
## Residual deviance: 114.08 on 98 degrees of freedom
## AIC: 128.08
##
## Number of Fisher Scoring iterations: 6
pseudo_r2 <- pR2(logit_model3) # Calculate pseudo R-squared measures
## fitting null model for pseudo-r2
pseudo_r2 # Print the pseudo R-squared values
## llh llhNull G2 McFadden r2ML r2CU
## -57.6591304 -67.5058062 19.6933515 0.1458641 0.1710171 0.2363494
# 1. Multicollinearity
vif_values <- vif(logit_model3)
print("VIF Values for logit_model3:")
## [1] "VIF Values for logit_model3:"
print(vif_values)
## EPS_z MC_z PBV_z
## 5.793816 5.979891 1.080899
# 3. Independence of Observations (Durbin-Watson Test)
dw_test <- dwtest(logit_model3)
print("Durbin-Watson Test for logit_model3:")
## [1] "Durbin-Watson Test for logit_model3:"
print(dw_test)
##
## Durbin-Watson test
##
## data: logit_model3
## DW = 1.7461, p-value = 0.08873
## alternative hypothesis: true autocorrelation is greater than 0