#1. Yumna Salma / G1401221004
#2. Ghonniyu Hibban Saputra / G1401221012
#3. Nabil Bintang Prayoga / G1401221017
#4. Muhammad Firlan Maulana / G1401221042
#5. Alista Sava Davina / G1401221046
#Memanggil library tidyverse dan membangkitkan data
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
country_data <- read_csv("C:/Users/nbint/Downloads/world-data-2023.csv", show_col_type = FALSE)
glimpse(country_data)
## Rows: 195
## Columns: 35
## $ Country <chr> "Afghanistan", "Albania", …
## $ `Density\n(P/Km2)` <dbl> 60, 105, 18, 164, 26, 223,…
## $ Abbreviation <chr> "AF", "AL", "DZ", "AD", "A…
## $ `Agricultural Land( %)` <chr> "58.10%", "43.10%", "17.40…
## $ `Land Area(Km2)` <dbl> 652230, 28748, 2381741, 46…
## $ `Armed Forces size` <dbl> 323000, 9000, 317000, NA, …
## $ `Birth Rate` <dbl> 32.49, 11.78, 24.28, 7.20,…
## $ `Calling Code` <dbl> 93, 355, 213, 376, 244, 1,…
## $ `Capital/Major City` <chr> "Kabul", "Tirana", "Algier…
## $ `Co2-Emissions` <dbl> 8672, 4536, 150006, 469, 3…
## $ CPI <dbl> 149.90, 119.05, 151.36, NA…
## $ `CPI Change (%)` <chr> "2.30%", "1.40%", "2.00%",…
## $ `Currency-Code` <chr> "AFN", "ALL", "DZD", "EUR"…
## $ `Fertility Rate` <dbl> 4.47, 1.62, 3.02, 1.27, 5.…
## $ `Forested Area (%)` <chr> "2.10%", "28.10%", "0.80%"…
## $ `Gasoline Price` <chr> "$0.70", "$1.36", "$0.28",…
## $ GDP <chr> "$19,101,353,833", "$15,27…
## $ `Gross primary education enrollment (%)` <chr> "104.00%", "107.00%", "109…
## $ `Gross tertiary education enrollment (%)` <chr> "9.70%", "55.00%", "51.40%…
## $ `Infant mortality` <dbl> 47.9, 7.8, 20.1, 2.7, 51.6…
## $ `Largest city` <chr> "Kabul", "Tirana", "Algier…
## $ `Life expectancy` <dbl> 64.5, 78.5, 76.7, NA, 60.8…
## $ `Maternal mortality ratio` <dbl> 638, 15, 112, NA, 241, 42,…
## $ `Minimum wage` <chr> "$0.43", "$1.12", "$0.95",…
## $ `Official language` <chr> "Pashto", "Albanian", "Ara…
## $ `Out of pocket health expenditure` <chr> "78.40%", "56.90%", "28.10…
## $ `Physicians per thousand` <dbl> 0.28, 1.20, 1.72, 3.33, 0.…
## $ Population <dbl> 38041754, 2854191, 4305305…
## $ `Population: Labor force participation (%)` <chr> "48.90%", "55.70%", "41.20…
## $ `Tax revenue (%)` <chr> "9.30%", "18.60%", "37.20%…
## $ `Total tax rate` <chr> "71.40%", "36.60%", "66.10…
## $ `Unemployment rate` <chr> "11.12%", "12.33%", "11.70…
## $ Urban_population <dbl> 9797273, 1747593, 31510100…
## $ Latitude <dbl> 33.939110, 41.153332, 28.0…
## $ Longitude <dbl> 67.709953, 20.168331, 1.65…
#1
##Menghitung NA pada dataset
country_data %>%
summarize(sum(is.na(country_data)))
## # A tibble: 1 × 1
## `sum(is.na(country_data))`
## <int>
## 1 337
#2
##Menghapus NA
country_data <- country_data %>%
filter_all(all_vars(!is.na(.)))
country_data
## # A tibble: 110 × 35
## Country `Density\n(P/Km2)` Abbreviation `Agricultural Land( %)`
## <chr> <dbl> <chr> <chr>
## 1 Afghanistan 60 AF 58.10%
## 2 Albania 105 AL 43.10%
## 3 Algeria 18 DZ 17.40%
## 4 Angola 26 AO 47.50%
## 5 Argentina 17 AR 54.30%
## 6 Armenia 104 AM 58.90%
## 7 Australia 3 AU 48.20%
## 8 Azerbaijan 123 AZ 57.70%
## 9 Bangladesh 1265 BD 70.60%
## 10 Barbados 668 BB 23.30%
## # ℹ 100 more rows
## # ℹ 31 more variables: `Land Area(Km2)` <dbl>, `Armed Forces size` <dbl>,
## # `Birth Rate` <dbl>, `Calling Code` <dbl>, `Capital/Major City` <chr>,
## # `Co2-Emissions` <dbl>, CPI <dbl>, `CPI Change (%)` <chr>,
## # `Currency-Code` <chr>, `Fertility Rate` <dbl>, `Forested Area (%)` <chr>,
## # `Gasoline Price` <chr>, GDP <chr>,
## # `Gross primary education enrollment (%)` <chr>, …
#3
##Mengubah data dollar dan presentase bertipe character menjadi double
country_data <- country_data %>%
mutate(across(where(~ any(grepl("[\\$%,]", .))),
~ ifelse(grepl("[\\$%,]", .), as.numeric(gsub("[\\$%,]", "", .)), .)))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(...)`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
country_data
## # A tibble: 110 × 35
## Country `Density\n(P/Km2)` Abbreviation `Agricultural Land( %)`
## <chr> <dbl> <chr> <dbl>
## 1 Afghanistan 60 AF 58.1
## 2 Albania 105 AL 43.1
## 3 Algeria 18 DZ 17.4
## 4 Angola 26 AO 47.5
## 5 Argentina 17 AR 54.3
## 6 Armenia 104 AM 58.9
## 7 Australia 3 AU 48.2
## 8 Azerbaijan 123 AZ 57.7
## 9 Bangladesh 1265 BD 70.6
## 10 Barbados 668 BB 23.3
## # ℹ 100 more rows
## # ℹ 31 more variables: `Land Area(Km2)` <dbl>, `Armed Forces size` <dbl>,
## # `Birth Rate` <dbl>, `Calling Code` <dbl>, `Capital/Major City` <chr>,
## # `Co2-Emissions` <dbl>, CPI <dbl>, `CPI Change (%)` <dbl>,
## # `Currency-Code` <chr>, `Fertility Rate` <dbl>, `Forested Area (%)` <dbl>,
## # `Gasoline Price` <dbl>, GDP <dbl>,
## # `Gross primary education enrollment (%)` <dbl>, …
#4
##Mengonversi mata uang dollar menjadi rupiah
country_data <- country_data %>%
mutate(`Gasoline Price` = `Gasoline Price` * 15528.85,
`GDP` = `GDP` * 15528.85,
`Minimum wage` = `Minimum wage` * 15528.85)
country_data
## # A tibble: 110 × 35
## Country `Density\n(P/Km2)` Abbreviation `Agricultural Land( %)`
## <chr> <dbl> <chr> <dbl>
## 1 Afghanistan 60 AF 58.1
## 2 Albania 105 AL 43.1
## 3 Algeria 18 DZ 17.4
## 4 Angola 26 AO 47.5
## 5 Argentina 17 AR 54.3
## 6 Armenia 104 AM 58.9
## 7 Australia 3 AU 48.2
## 8 Azerbaijan 123 AZ 57.7
## 9 Bangladesh 1265 BD 70.6
## 10 Barbados 668 BB 23.3
## # ℹ 100 more rows
## # ℹ 31 more variables: `Land Area(Km2)` <dbl>, `Armed Forces size` <dbl>,
## # `Birth Rate` <dbl>, `Calling Code` <dbl>, `Capital/Major City` <chr>,
## # `Co2-Emissions` <dbl>, CPI <dbl>, `CPI Change (%)` <dbl>,
## # `Currency-Code` <chr>, `Fertility Rate` <dbl>, `Forested Area (%)` <dbl>,
## # `Gasoline Price` <dbl>, GDP <dbl>,
## # `Gross primary education enrollment (%)` <dbl>, …
#5
##Membuat pengkategorian negara berdasar unemployment rate
Kategori <- country_data %>%
select(Country, 'Unemployment rate', GDP, CPI) %>%
mutate(country_status = case_when(`Unemployment rate` < 1 ~ "rich",
`Unemployment rate` > 0.999999 & `Unemployment rate` < 5 ~ "developing",
`Unemployment rate` > 4.999999 ~ "poor"))
Kategori
## # A tibble: 110 × 5
## Country `Unemployment rate` GDP CPI country_status
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 Afghanistan 11.1 2.97e14 150. poor
## 2 Albania 12.3 2.37e14 119. poor
## 3 Algeria 11.7 2.64e15 151. poor
## 4 Angola 6.89 1.47e15 262. poor
## 5 Argentina 9.79 6.98e15 233. poor
## 6 Armenia 17.0 2.12e14 129. poor
## 7 Australia 5.27 2.16e16 120. poor
## 8 Azerbaijan 5.51 6.09e14 156. poor
## 9 Bangladesh 4.19 4.70e15 180. developing
## 10 Barbados 10.3 8.09e13 134. poor
## # ℹ 100 more rows
#6
##Rata-rata dan nilai maksimum dari GDP dan CPI serta banyak negara per statusnya
Rataan_Maks <- Kategori %>%
group_by(country_status) %>%
summarize(mean_GDP = mean(GDP),
max_GDP = max(GDP),
mean_CPI = mean(CPI),
max_CPI = max(CPI),
count_countries = n())
Rataan_Maks
## # A tibble: 3 × 6
## country_status mean_GDP max_GDP mean_CPI max_CPI count_countries
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 developing 1.27e16 3.09e17 145. 268. 46
## 2 poor 1.01e16 3.33e17 177. 1344. 61
## 3 rich 2.98e15 8.44e15 119. 136. 3
#7
##Mengubah hasil data nomor 6 dengan format long
Format_Long <- Rataan_Maks %>%
pivot_longer(cols = c(mean_GDP, max_GDP, mean_CPI, max_CPI, count_countries),
names_to = "statistic",
values_to = "value")
Format_Long
## # A tibble: 15 × 3
## country_status statistic value
## <chr> <chr> <dbl>
## 1 developing mean_GDP 1.27e16
## 2 developing max_GDP 3.09e17
## 3 developing mean_CPI 1.45e 2
## 4 developing max_CPI 2.68e 2
## 5 developing count_countries 4.6 e 1
## 6 poor mean_GDP 1.01e16
## 7 poor max_GDP 3.33e17
## 8 poor mean_CPI 1.77e 2
## 9 poor max_CPI 1.34e 3
## 10 poor count_countries 6.1 e 1
## 11 rich mean_GDP 2.98e15
## 12 rich max_GDP 8.44e15
## 13 rich mean_CPI 1.19e 2
## 14 rich max_CPI 1.36e 2
## 15 rich count_countries 3 e 0