#Load Package
library(readxl)
library(tidyverse)
## Warning: package 'forcats' was built under R version 4.4.1
## Warning: package 'lubridate' was built under R version 4.4.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(ggplot2)
library(dplyr)
library(broom)
library(ggpubr)
#Import Data Set dari Excel
DataTugas2 <- read_excel("~/Documents/S1 Aida/Semester 2 2025/STSI4204 Analisis Visualisasi Data/Tugas 2/Tugas 2 - data_pendapatan.xlsx")
#Membuat histogram data Kebahagiaan
Bahagia <- seq(min(DataTugas2$kebahagiaan), max(DataTugas2$kebahagiaan), length = 50)
fun <- dnorm(Bahagia, mean = mean(DataTugas2$kebahagiaan), sd = sd(DataTugas2$kebahagiaan))
histogram<- hist(DataTugas2$kebahagiaan, main="Histogram Data Kebahagiaan", xlab="Data Kebahagiaan", xlim=c(0, 8), ylim = c(0,80))
fun<- fun * diff(histogram$mids[1:2]) * length(DataTugas2$kebahagiaan)
histogram
## $breaks
##  [1] 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0
## 
## $counts
##  [1]  4 22 27 43 58 47 53 64 54 56 33 24 11  2
## 
## $density
##  [1] 0.016064257 0.088353414 0.108433735 0.172690763 0.232931727 0.188755020
##  [7] 0.212851406 0.257028112 0.216867470 0.224899598 0.132530120 0.096385542
## [13] 0.044176707 0.008032129
## 
## $mids
##  [1] 0.25 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75 5.25 5.75 6.25 6.75
## 
## $xname
## [1] "DataTugas2$kebahagiaan"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
lines(Bahagia, fun, col = "darkred", lwd = 2)
text(x = histogram$mids[histogram$counts > 0], y = histogram$counts[histogram$counts > 0], labels = (histogram$counts [histogram$counts > 0]), pos = 3, cex = 0.8)

#Membuat Scatterplot x = pendapatan, y = kebahagiaan
plot(DataTugas2$pendapatan, DataTugas2$kebahagiaan, main="Scatterplot Pendapatan dan Kebahagiaan",
   xlab="Pendapatan", ylab=" Kebahagiaan", pch=19, col=rgb(0,100,0,100, maxColorValue=255))
lines(lowess(DataTugas2$pendapatan, DataTugas2$kebahagiaan),
      col = "black", lwd = 2)

#Histogram pendapatan
ggplot(DataTugas2, aes(x = pendapatan)) +
  geom_histogram(aes(y = ..density..), bins = 10, fill = "grey", color = "black") +
  stat_function(fun = dnorm,
                args = list(mean = mean(DataTugas2$pendapatan),
                            sd = sd(DataTugas2$pendapatan)),
                color = "red", linewidth = 1.2) +
  labs(title = "Histogram Pendapatan dengan Kurva Normal",
       x = "Pendapatan",
       y = "Densitas")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Regresi Linear Sederhana x = pendapatan, y = kebahagiaan dengan function lm()
model1 <- lm(kebahagiaan ~ pendapatan, data = DataTugas2)
tidy(model1) #Fungsi tidy() mengembalikan kerangka data dengan satu baris per koefisien, termasuk istilah, estimasi, kesalahan standar, statistik, dan p-value
## # A tibble: 2 × 5
##   term        estimate std.error statistic   p.value
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)    0.209    0.0896      2.33 2.01e-  2
## 2 pendapatan     0.712    0.0187     38.1  2.22e-149
summary(model1)
## 
## Call:
## lm(formula = kebahagiaan ~ pendapatan, data = DataTugas2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.02292 -0.47796  0.03729  0.44564  2.38002 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.20906    0.08962   2.333   0.0201 *  
## pendapatan   0.71239    0.01870  38.089   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.724 on 496 degrees of freedom
## Multiple R-squared:  0.7452, Adjusted R-squared:  0.7447 
## F-statistic:  1451 on 1 and 496 DF,  p-value: < 2.2e-16
#Homoskedastistias Plot
plot(model1)

#Plot y = kebahagiaan, x = pendapatan
ggplot(DataTugas2, aes(x = pendapatan, y = kebahagiaan)) +
  geom_point(col=rgb(0,70,0,100, maxColorValue=255), size = 2) +  #Plot data points pada grafik
  geom_smooth(method = "lm", se = FALSE, color = "darkred", linewidth = 1.2) +  #menambahkan garis regresi pada data yang sudah diplot
  stat_regline_equation(label.x = 3, label.y = max(DataTugas2$pendapatan)*0.9,  #menambahkan persamaan untuk garis linear
                        aes(label =  paste(..eq.label.., ..rr.label.., sep = "~~~~"))) +
  labs(title = "Pengaruh Pendapatan terhadap Kebahagiaan",  #Memberi judul dan label untuk sumbu x dan y
       x = "Pendapatan",
       y = "Kebahagiaan") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Menyimpan koefisien
a <- coef(model1)[1]   # Intersep (konstanta)
b <- coef(model1)[2]   # Koefisien regresi

# Membuat label persamaan
eq_label <- paste0("Y = ", round(a, 2), " + ", round(b, 2), "X")

# Membuat scatter plot dengan garis regresi dan persamaan garis
ggscatter(DataTugas2, x = "pendapatan", y = "kebahagiaan",
          add = "reg.line",          # menambahkan garis regresi
          conf.int = TRUE,           
          cor.coef = TRUE,          
          cor.method = "pearson",    
          xlab = "Pendapatan", #memberi label pada sumbu x 
          ylab = "Kebahagiaan", #memberi label pada sumbu y
          col= rgb(0,70,0,100, maxColorValue=255), 
          add.params = list(color = "darkred", lwd = 2)) +
  annotate("text", x = Inf, y = -Inf, 
           label = eq_label,
           hjust = 1.1, vjust = -1.5,
           color = "black", size = 4, fontface = "bold") +
  ggtitle("Pengaruh Pendapatan terhadap Kebahagiaan") +  # Memberi judul plot
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

#scatterplot model 1
pendapatan <- DataTugas2$pendapatan
kebahagiaan <- DataTugas2$kebahagiaan
plot(pendapatan, kebahagiaan)
a <- 0.20906
b <- 0.71239
abline(a, b, col = "red")