Pendidikan global mencakup upaya untuk memahami dan meningkatkan sistem pendidikan di seluruh dunia. Hal ini melibatkan analisis terhadap tantangan dan peluang pendidikan di berbagai negara, serta upaya untuk meningkatkan akses, kualitas, dan relevansi pendidikan secara global. Dataset Kaggle https://www.kaggle.com/datasets/nelgiriyewithana/world-educational-data menyediakan data yang dapat membantu kita menganalisis berbagai isu dan tren mempengaruhi pendidikan global, dan ini melibatkan perhatian pada semua tingkatan pendidikan, mulai dari pendidikan anak usia dini hingga pendidikan tinggi.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
global_education <- read_csv("Global_Education.csv",show_col_types = FALSE)
global_education
## # A tibble: 202 × 29
## `Countries and areas` Latitude Longitude OOSR_Pre0Primary_Age_Male
## <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan 33.9 67.7 0
## 2 Albania 41.2 20.2 4
## 3 Algeria 28.0 1.66 0
## 4 Andorra 42.5 1.52 0
## 5 Angola 11.2 17.9 31
## 6 Anguilla 18.2 63.1 14
## 7 Antigua and Barbuda 17.1 61.8 14
## 8 Argentina 38.4 63.6 2
## 9 Armenia 40.1 45.0 52
## 10 Australia 25.3 134. 13
## # ℹ 192 more rows
## # ℹ 25 more variables: OOSR_Pre0Primary_Age_Female <dbl>,
## # OOSR_Primary_Age_Male <dbl>, OOSR_Primary_Age_Female <dbl>,
## # OOSR_Lower_Secondary_Age_Male <dbl>, OOSR_Lower_Secondary_Age_Female <dbl>,
## # OOSR_Upper_Secondary_Age_Male <dbl>, OOSR_Upper_Secondary_Age_Female <dbl>,
## # Completion_Rate_Primary_Male <dbl>, Completion_Rate_Primary_Female <dbl>,
## # Completion_Rate_Lower_Secondary_Male <dbl>, …
Fungsi glimpse digunakan untuk mendapatkan gambaran umum
data seperti tipe data
(dbl,int,chr,factor,lgl),
snapshoot amatan-amatan awal, banyaknya baris dan banyaknya kolom
glimpse(global_education)
## Rows: 202
## Columns: 29
## $ `Countries and areas` <chr> "Afghanistan", "Albania", "Alg…
## $ Latitude <dbl> 33.93911, 41.15333, 28.03389, …
## $ Longitude <dbl> 67.709953, 20.168331, 1.659626…
## $ OOSR_Pre0Primary_Age_Male <dbl> 0, 4, 0, 0, 31, 14, 14, 2, 52,…
## $ OOSR_Pre0Primary_Age_Female <dbl> 0, 2, 0, 0, 39, 0, 4, 2, 50, 1…
## $ OOSR_Primary_Age_Male <dbl> 0, 6, 0, 0, 0, 0, 4, 0, 9, 0, …
## $ OOSR_Primary_Age_Female <dbl> 0, 3, 0, 0, 0, 0, 1, 0, 9, 0, …
## $ OOSR_Lower_Secondary_Age_Male <dbl> 0, 6, 0, 0, 0, 0, 1, 0, 11, 2,…
## $ OOSR_Lower_Secondary_Age_Female <dbl> 0, 1, 0, 0, 0, 0, 2, 0, 9, 3, …
## $ OOSR_Upper_Secondary_Age_Male <dbl> 44, 21, 0, 0, 0, 0, 14, 15, 16…
## $ OOSR_Upper_Secondary_Age_Female <dbl> 69, 15, 0, 0, 0, 0, 12, 7, 4, …
## $ Completion_Rate_Primary_Male <dbl> 67, 94, 93, 0, 63, 0, 0, 91, 9…
## $ Completion_Rate_Primary_Female <dbl> 40, 96, 93, 0, 57, 0, 0, 94, 9…
## $ Completion_Rate_Lower_Secondary_Male <dbl> 49, 98, 49, 0, 42, 0, 0, 70, 9…
## $ Completion_Rate_Lower_Secondary_Female <dbl> 26, 97, 65, 0, 32, 0, 0, 79, 9…
## $ Completion_Rate_Upper_Secondary_Male <dbl> 32, 76, 22, 0, 24, 0, 0, 46, 6…
## $ Completion_Rate_Upper_Secondary_Female <dbl> 14, 80, 37, 0, 15, 0, 0, 53, 7…
## $ Grade_2_3_Proficiency_Reading <dbl> 22, 0, 0, 0, 0, 0, 0, 76, 0, 9…
## $ Grade_2_3_Proficiency_Math <dbl> 25, 0, 0, 0, 0, 0, 0, 71, 0, 7…
## $ Primary_End_Proficiency_Reading <dbl> 13, 0, 0, 0, 0, 0, 0, 46, 0, 0…
## $ Primary_End_Proficiency_Math <dbl> 11, 0, 0, 0, 0, 0, 0, 56, 55, …
## $ Lower_Secondary_End_Proficiency_Reading <dbl> 0, 48, 21, 0, 0, 0, 0, 48, 0, …
## $ Lower_Secondary_End_Proficiency_Math <dbl> 0, 58, 19, 0, 0, 0, 0, 31, 50,…
## $ Youth_15_24_Literacy_Rate_Male <dbl> 74, 99, 98, 0, 0, 0, 0, 99, 0,…
## $ Youth_15_24_Literacy_Rate_Female <dbl> 56, 100, 97, 0, 0, 0, 0, 100, …
## $ Birth_Rate <dbl> 32.49, 11.78, 24.28, 7.20, 40.…
## $ Gross_Primary_Education_Enrollment <dbl> 104.0, 107.0, 109.9, 106.4, 11…
## $ Gross_Tertiary_Education_Enrollment <dbl> 9.7, 55.0, 51.4, 0.0, 9.3, 0.0…
## $ Unemployment_Rate <dbl> 11.12, 12.33, 11.70, 0.00, 6.8…
view(global_education)
head(global_education)
## # A tibble: 6 × 29
## `Countries and areas` Latitude Longitude OOSR_Pre0Primary_Age_Male
## <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan 33.9 67.7 0
## 2 Albania 41.2 20.2 4
## 3 Algeria 28.0 1.66 0
## 4 Andorra 42.5 1.52 0
## 5 Angola 11.2 17.9 31
## 6 Anguilla 18.2 63.1 14
## # ℹ 25 more variables: OOSR_Pre0Primary_Age_Female <dbl>,
## # OOSR_Primary_Age_Male <dbl>, OOSR_Primary_Age_Female <dbl>,
## # OOSR_Lower_Secondary_Age_Male <dbl>, OOSR_Lower_Secondary_Age_Female <dbl>,
## # OOSR_Upper_Secondary_Age_Male <dbl>, OOSR_Upper_Secondary_Age_Female <dbl>,
## # Completion_Rate_Primary_Male <dbl>, Completion_Rate_Primary_Female <dbl>,
## # Completion_Rate_Lower_Secondary_Male <dbl>,
## # Completion_Rate_Lower_Secondary_Female <dbl>, …
summary(global_education)
## Countries and areas Latitude Longitude
## Length:202 Min. : 0.02356 Min. : 0.8248
## Class :character 1st Qu.:11.68506 1st Qu.: 18.6657
## Mode :character Median :21.20786 Median : 43.5181
## Mean :25.08142 Mean : 55.1669
## 3rd Qu.:39.90179 3rd Qu.: 77.6850
## Max. :64.96305 Max. :178.0650
## OOSR_Pre0Primary_Age_Male OOSR_Pre0Primary_Age_Female OOSR_Primary_Age_Male
## Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 9.00 Median : 7.00 Median : 1.000
## Mean :19.66 Mean :19.28 Mean : 5.282
## 3rd Qu.:31.00 3rd Qu.:30.00 3rd Qu.: 6.000
## Max. :96.00 Max. :96.00 Max. :58.000
## OOSR_Primary_Age_Female OOSR_Lower_Secondary_Age_Male
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.000 Median : 2.000
## Mean : 5.569 Mean : 8.708
## 3rd Qu.: 6.750 3rd Qu.:12.750
## Max. :67.000 Max. :61.000
## OOSR_Lower_Secondary_Age_Female OOSR_Upper_Secondary_Age_Male
## Min. : 0.000 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 0.25
## Median : 2.000 Median :15.00
## Mean : 8.832 Mean :20.29
## 3rd Qu.:10.750 3rd Qu.:32.75
## Max. :70.000 Max. :84.00
## OOSR_Upper_Secondary_Age_Female Completion_Rate_Primary_Male
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.25 1st Qu.: 0.00
## Median :12.00 Median : 37.50
## Mean :19.98 Mean : 41.72
## 3rd Qu.:30.00 3rd Qu.: 87.50
## Max. :89.00 Max. :100.00
## Completion_Rate_Primary_Female Completion_Rate_Lower_Secondary_Male
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 33.00 Median : 18.50
## Mean : 42.13 Mean : 32.74
## 3rd Qu.: 92.00 3rd Qu.: 64.75
## Max. :100.00 Max. :100.00
## Completion_Rate_Lower_Secondary_Female Completion_Rate_Upper_Secondary_Male
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 12.00 Median : 9.50
## Mean : 33.17 Mean : 22.68
## 3rd Qu.: 70.75 3rd Qu.: 40.00
## Max. :100.00 Max. :100.00
## Completion_Rate_Upper_Secondary_Female Grade_2_3_Proficiency_Reading
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 5.50 Median : 0.00
## Mean : 23.07 Mean :21.98
## 3rd Qu.: 38.75 3rd Qu.:38.75
## Max. :100.00 Max. :99.00
## Grade_2_3_Proficiency_Math Primary_End_Proficiency_Reading
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.00 Median : 0.00
## Mean :17.44 Mean :10.72
## 3rd Qu.:32.75 3rd Qu.: 0.00
## Max. :97.00 Max. :99.00
## Primary_End_Proficiency_Math Lower_Secondary_End_Proficiency_Reading
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.00 Median : 0.00
## Mean :10.38 Mean :25.79
## 3rd Qu.: 0.00 3rd Qu.:56.75
## Max. :89.00 Max. :89.00
## Lower_Secondary_End_Proficiency_Math Youth_15_24_Literacy_Rate_Male
## Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.00 1st Qu.: 0.0
## Median : 0.00 Median : 0.0
## Mean :24.45 Mean : 35.8
## 3rd Qu.:50.75 3rd Qu.: 94.0
## Max. :94.00 Max. :100.0
## Youth_15_24_Literacy_Rate_Female Birth_Rate
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.:10.36
## Median : 0.00 Median :17.55
## Mean : 35.08 Mean :18.91
## 3rd Qu.: 96.75 3rd Qu.:27.69
## Max. :100.00 Max. :46.08
## Gross_Primary_Education_Enrollment Gross_Tertiary_Education_Enrollment
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 97.20 1st Qu.: 9.00
## Median :101.85 Median : 24.85
## Mean : 94.94 Mean : 34.39
## 3rd Qu.:107.30 3rd Qu.: 59.98
## Max. :142.50 Max. :136.60
## Unemployment_Rate
## Min. : 0.000
## 1st Qu.: 2.303
## Median : 4.585
## Mean : 6.000
## 3rd Qu.: 8.655
## Max. :28.180
str(global_education)
## spc_tbl_ [202 × 29] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Countries and areas : chr [1:202] "Afghanistan" "Albania" "Algeria" "Andorra" ...
## $ Latitude : num [1:202] 33.9 41.2 28 42.5 11.2 ...
## $ Longitude : num [1:202] 67.71 20.17 1.66 1.52 17.87 ...
## $ OOSR_Pre0Primary_Age_Male : num [1:202] 0 4 0 0 31 14 14 2 52 13 ...
## $ OOSR_Pre0Primary_Age_Female : num [1:202] 0 2 0 0 39 0 4 2 50 14 ...
## $ OOSR_Primary_Age_Male : num [1:202] 0 6 0 0 0 0 4 0 9 0 ...
## $ OOSR_Primary_Age_Female : num [1:202] 0 3 0 0 0 0 1 0 9 0 ...
## $ OOSR_Lower_Secondary_Age_Male : num [1:202] 0 6 0 0 0 0 1 0 11 2 ...
## $ OOSR_Lower_Secondary_Age_Female : num [1:202] 0 1 0 0 0 0 2 0 9 3 ...
## $ OOSR_Upper_Secondary_Age_Male : num [1:202] 44 21 0 0 0 0 14 15 16 10 ...
## $ OOSR_Upper_Secondary_Age_Female : num [1:202] 69 15 0 0 0 0 12 7 4 6 ...
## $ Completion_Rate_Primary_Male : num [1:202] 67 94 93 0 63 0 0 91 99 0 ...
## $ Completion_Rate_Primary_Female : num [1:202] 40 96 93 0 57 0 0 94 99 0 ...
## $ Completion_Rate_Lower_Secondary_Male : num [1:202] 49 98 49 0 42 0 0 70 95 0 ...
## $ Completion_Rate_Lower_Secondary_Female : num [1:202] 26 97 65 0 32 0 0 79 99 0 ...
## $ Completion_Rate_Upper_Secondary_Male : num [1:202] 32 76 22 0 24 0 0 46 69 0 ...
## $ Completion_Rate_Upper_Secondary_Female : num [1:202] 14 80 37 0 15 0 0 53 79 0 ...
## $ Grade_2_3_Proficiency_Reading : num [1:202] 22 0 0 0 0 0 0 76 0 94 ...
## $ Grade_2_3_Proficiency_Math : num [1:202] 25 0 0 0 0 0 0 71 0 70 ...
## $ Primary_End_Proficiency_Reading : num [1:202] 13 0 0 0 0 0 0 46 0 0 ...
## $ Primary_End_Proficiency_Math : num [1:202] 11 0 0 0 0 0 0 56 55 64 ...
## $ Lower_Secondary_End_Proficiency_Reading: num [1:202] 0 48 21 0 0 0 0 48 0 80 ...
## $ Lower_Secondary_End_Proficiency_Math : num [1:202] 0 58 19 0 0 0 0 31 50 78 ...
## $ Youth_15_24_Literacy_Rate_Male : num [1:202] 74 99 98 0 0 0 0 99 0 0 ...
## $ Youth_15_24_Literacy_Rate_Female : num [1:202] 56 100 97 0 0 0 0 100 0 0 ...
## $ Birth_Rate : num [1:202] 32.5 11.8 24.3 7.2 40.7 ...
## $ Gross_Primary_Education_Enrollment : num [1:202] 104 107 110 106 114 ...
## $ Gross_Tertiary_Education_Enrollment : num [1:202] 9.7 55 51.4 0 9.3 ...
## $ Unemployment_Rate : num [1:202] 11.12 12.33 11.7 0 6.89 ...
## - attr(*, "spec")=
## .. cols(
## .. `Countries and areas` = col_character(),
## .. Latitude = col_double(),
## .. Longitude = col_double(),
## .. OOSR_Pre0Primary_Age_Male = col_double(),
## .. OOSR_Pre0Primary_Age_Female = col_double(),
## .. OOSR_Primary_Age_Male = col_double(),
## .. OOSR_Primary_Age_Female = col_double(),
## .. OOSR_Lower_Secondary_Age_Male = col_double(),
## .. OOSR_Lower_Secondary_Age_Female = col_double(),
## .. OOSR_Upper_Secondary_Age_Male = col_double(),
## .. OOSR_Upper_Secondary_Age_Female = col_double(),
## .. Completion_Rate_Primary_Male = col_double(),
## .. Completion_Rate_Primary_Female = col_double(),
## .. Completion_Rate_Lower_Secondary_Male = col_double(),
## .. Completion_Rate_Lower_Secondary_Female = col_double(),
## .. Completion_Rate_Upper_Secondary_Male = col_double(),
## .. Completion_Rate_Upper_Secondary_Female = col_double(),
## .. Grade_2_3_Proficiency_Reading = col_double(),
## .. Grade_2_3_Proficiency_Math = col_double(),
## .. Primary_End_Proficiency_Reading = col_double(),
## .. Primary_End_Proficiency_Math = col_double(),
## .. Lower_Secondary_End_Proficiency_Reading = col_double(),
## .. Lower_Secondary_End_Proficiency_Math = col_double(),
## .. Youth_15_24_Literacy_Rate_Male = col_double(),
## .. Youth_15_24_Literacy_Rate_Female = col_double(),
## .. Birth_Rate = col_double(),
## .. Gross_Primary_Education_Enrollment = col_double(),
## .. Gross_Tertiary_Education_Enrollment = col_double(),
## .. Unemployment_Rate = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
#Menghitung Jumlah Baris
nrow(global_education)
## [1] 202
#Menghitung Jumlah Kolom
ncol(global_education)
## [1] 29
#Menghitung Dimensi
dim(global_education)
## [1] 202 29
data1 <- global_education[1:10, 2:10]
data1
## # A tibble: 10 × 9
## Latitude Longitude OOSR_Pre0Primary_Age_Male OOSR_Pre0Primary_Age_Female
## <dbl> <dbl> <dbl> <dbl>
## 1 33.9 67.7 0 0
## 2 41.2 20.2 4 2
## 3 28.0 1.66 0 0
## 4 42.5 1.52 0 0
## 5 11.2 17.9 31 39
## 6 18.2 63.1 14 0
## 7 17.1 61.8 14 4
## 8 38.4 63.6 2 2
## 9 40.1 45.0 52 50
## 10 25.3 134. 13 14
## # ℹ 5 more variables: OOSR_Primary_Age_Male <dbl>,
## # OOSR_Primary_Age_Female <dbl>, OOSR_Lower_Secondary_Age_Male <dbl>,
## # OOSR_Lower_Secondary_Age_Female <dbl>, OOSR_Upper_Secondary_Age_Male <dbl>
global_education%>%
select(`Countries and areas`,Gross_Primary_Education_Enrollment) %>%
filter(Gross_Primary_Education_Enrollment > 80 & Gross_Primary_Education_Enrollment <= 130) %>%
arrange(Gross_Primary_Education_Enrollment)
## # A tibble: 174 × 2
## `Countries and areas` Gross_Primary_Education_Enrollment
## <chr> <dbl>
## 1 Senegal 81
## 2 The Bahamas 81.4
## 3 Jordan 81.5
## 4 Syria 81.7
## 5 Marshall Islands 84.7
## 6 Nigeria 84.7
## 7 Liberia 85.1
## 8 Romania 85.2
## 9 Chad 86.8
## 10 Turkmenistan 88.4
## # ℹ 164 more rows
#Rasio angka putus sekolah (OOSR) per tingkatan (SD)
OOSR <- global_education[(4:11)]
colnames(OOSR) <- c("Pra_Laki-Laki", "Pra_Perempuan", "SD_Laki-Laki", "SD_Perempuan", "SMP_Laki-Laki", "SMP_Perempuan", "SMA_Laki-Laki", "SMA_Perempuan" )
OOSR
## # A tibble: 202 × 8
## `Pra_Laki-Laki` Pra_Perempuan `SD_Laki-Laki` SD_Perempuan `SMP_Laki-Laki`
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 0 0 0
## 2 4 2 6 3 6
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 31 39 0 0 0
## 6 14 0 0 0 0
## 7 14 4 4 1 1
## 8 2 2 0 0 0
## 9 52 50 9 9 11
## 10 13 14 0 0 2
## # ℹ 192 more rows
## # ℹ 3 more variables: SMP_Perempuan <dbl>, `SMA_Laki-Laki` <dbl>,
## # SMA_Perempuan <dbl>
#Matriks korelasi Pearson
matriks_korelasi_person <- cor(OOSR,method = 'pearson')
matriks_korelasi_person
## Pra_Laki-Laki Pra_Perempuan SD_Laki-Laki SD_Perempuan
## Pra_Laki-Laki 1.0000000 0.9841451 0.5419790 0.5786503
## Pra_Perempuan 0.9841451 1.0000000 0.5370577 0.5820723
## SD_Laki-Laki 0.5419790 0.5370577 1.0000000 0.9641599
## SD_Perempuan 0.5786503 0.5820723 0.9641599 1.0000000
## SMP_Laki-Laki 0.5346246 0.5318786 0.5609557 0.6607756
## SMP_Perempuan 0.5338757 0.5390811 0.5726859 0.6989913
## SMA_Laki-Laki 0.4750475 0.4565633 0.4065439 0.4836571
## SMA_Perempuan 0.4769198 0.4672795 0.4198253 0.5171418
## SMP_Laki-Laki SMP_Perempuan SMA_Laki-Laki SMA_Perempuan
## Pra_Laki-Laki 0.5346246 0.5338757 0.4750475 0.4769198
## Pra_Perempuan 0.5318786 0.5390811 0.4565633 0.4672795
## SD_Laki-Laki 0.5609557 0.5726859 0.4065439 0.4198253
## SD_Perempuan 0.6607756 0.6989913 0.4836571 0.5171418
## SMP_Laki-Laki 1.0000000 0.9660852 0.8075919 0.8084832
## SMP_Perempuan 0.9660852 1.0000000 0.7728536 0.8125554
## SMA_Laki-Laki 0.8075919 0.7728536 1.0000000 0.9650804
## SMA_Perempuan 0.8084832 0.8125554 0.9650804 1.0000000
boxplot(OOSR, main="Boxplot Rasio Angka Putus Sekolah", col=rainbow(length(OOSR)))
dari boxplot tersebut dapat kita ketahui bahwa rasio angka putus sekolah lebih banyak di tingkat pra sekolah dan tingkat SMA
completion<- global_education[(12:17)]
colnames(completion) <- c("SD_Laki-Laki", "SD_Perempuan", "SMP_Laki-Laki", "SMP_Perempuan", "SMA_Laki-Laki", "SMA_Perempuan" )
completion
## # A tibble: 202 × 6
## `SD_Laki-Laki` SD_Perempuan `SMP_Laki-Laki` SMP_Perempuan `SMA_Laki-Laki`
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 67 40 49 26 32
## 2 94 96 98 97 76
## 3 93 93 49 65 22
## 4 0 0 0 0 0
## 5 63 57 42 32 24
## 6 0 0 0 0 0
## 7 0 0 0 0 0
## 8 91 94 70 79 46
## 9 99 99 95 99 69
## 10 0 0 0 0 0
## # ℹ 192 more rows
## # ℹ 1 more variable: SMA_Perempuan <dbl>
boxplot(completion, main="Boxplot Rasio Angka Penyelesaian Sekolah", col=rainbow(length(completion)))
dari boxplot tersebut dapat kita ketahui bahwa angka penyelesaian sekolah lebih banyak di tingkat SD
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.2
## corrplot 0.92 loaded
m <- cor(OOSR)
library(viridis)
## Warning: package 'viridis' was built under R version 4.3.2
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 4.3.2
corrplot(m, method = 'ellipse', type='lower', order='original', addCoef.col=mako(22), tl.col = viridis(5))
library(corrplot)
data1 <- subset(global_education, select= c(26,29))
data1
## # A tibble: 202 × 2
## Birth_Rate Unemployment_Rate
## <dbl> <dbl>
## 1 32.5 11.1
## 2 11.8 12.3
## 3 24.3 11.7
## 4 7.2 0
## 5 40.7 6.89
## 6 0 0
## 7 15.3 0
## 8 17.0 9.79
## 9 14.0 17.0
## 10 12.6 5.27
## # ℹ 192 more rows
library(ggplot2)
# Membuat model regresi linear
model <- lm(Birth_Rate ~ Unemployment_Rate, data = data1)
# Membuat scatter plot dengan regresi linier
p <- ggplot(data1, aes(x = Birth_Rate, y = Unemployment_Rate)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "cyan") +
labs(title = "Regresi Linier Sederhana", x = "Tingkat Kelahiran", y = "Tingkat Pengangguran")
# Menampilkan scatter plot
print(p)
## `geom_smooth()` using formula = 'y ~ x'
# Menambahkan teks dengan nilai slope dan intercept
p + geom_text(x = 25, y = 25, label = paste("Slope =", round(coef(model)[2], 2), "\nIntercept =", round(coef(model)[1], 2)), col = "blue", hjust = 0)
## `geom_smooth()` using formula = 'y ~ x'
berdasarkan scatter plot tersebut, terdapat korelasi positif antara tingkat kelahiran dan tingkat pengangguran, dimana semakin tinggi tingkat kelahiran, maka semakin tinggi juga tingkat pengangguran
Fungsi summerize digunakan untuk merangkum banyak baris (amatan) menjadi satu baris, rangkuman ini bisa berupa mean, median,variance,sd(standar deviasi).
library(dplyr)
data1 %>%
summarize(mean = mean(Birth_Rate,na.rm = TRUE),
median = median(Birth_Rate,na.rm = TRUE),
sd = sd(Birth_Rate,na.rm=TRUE),
q1 = quantile(Birth_Rate,probs = 0.25,na.rm = TRUE))
## # A tibble: 1 × 4
## mean median sd q1
## <dbl> <dbl> <dbl> <dbl>
## 1 18.9 17.6 10.8 10.4