import data set
library(readr)
waste <- read_csv("country_level_data_0.csv")
## Rows: 217 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): iso3c, region_id, country_name, income_id, other_information_infor...
## dbl (41): gdp, composition_food_organic_waste_percent, composition_glass_per...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(waste)
## [1] "iso3c"
## [2] "region_id"
## [3] "country_name"
## [4] "income_id"
## [5] "gdp"
## [6] "composition_food_organic_waste_percent"
## [7] "composition_glass_percent"
## [8] "composition_metal_percent"
## [9] "composition_other_percent"
## [10] "composition_paper_cardboard_percent"
## [11] "composition_plastic_percent"
## [12] "composition_rubber_leather_percent"
## [13] "composition_wood_percent"
## [14] "composition_yard_garden_green_waste_percent"
## [15] "other_information_information_system_for_solid_waste_management"
## [16] "other_information_national_agency_to_enforce_solid_waste_laws_and_regulations"
## [17] "other_information_national_law_governing_solid_waste_management_in_the_country"
## [18] "other_information_ppp_rules_and_regulations"
## [19] "other_information_summary_of_key_solid_waste_information_made_available_to_the_public"
## [20] "population_population_number_of_people"
## [21] "special_waste_agricultural_waste_tons_year"
## [22] "special_waste_construction_and_demolition_waste_tons_year"
## [23] "special_waste_e_waste_tons_year"
## [24] "special_waste_hazardous_waste_tons_year"
## [25] "special_waste_industrial_waste_tons_year"
## [26] "special_waste_medical_waste_tons_year"
## [27] "total_msw_total_msw_generated_tons_year"
## [28] "waste_collection_coverage_rural_percent_of_geographic_area"
## [29] "waste_collection_coverage_rural_percent_of_households"
## [30] "waste_collection_coverage_rural_percent_of_population"
## [31] "waste_collection_coverage_rural_percent_of_waste"
## [32] "waste_collection_coverage_total_percent_of_geographic_area"
## [33] "waste_collection_coverage_total_percent_of_households"
## [34] "waste_collection_coverage_total_percent_of_population"
## [35] "waste_collection_coverage_total_percent_of_waste"
## [36] "waste_collection_coverage_urban_percent_of_geographic_area"
## [37] "waste_collection_coverage_urban_percent_of_households"
## [38] "waste_collection_coverage_urban_percent_of_population"
## [39] "waste_collection_coverage_urban_percent_of_waste"
## [40] "waste_treatment_anaerobic_digestion_percent"
## [41] "waste_treatment_compost_percent"
## [42] "waste_treatment_controlled_landfill_percent"
## [43] "waste_treatment_incineration_percent"
## [44] "waste_treatment_landfill_unspecified_percent"
## [45] "waste_treatment_open_dump_percent"
## [46] "waste_treatment_other_percent"
## [47] "waste_treatment_recycling_percent"
## [48] "waste_treatment_sanitary_landfill_landfill_gas_system_percent"
## [49] "waste_treatment_unaccounted_for_percent"
## [50] "waste_treatment_waterways_marine_percent"
## [51] "where_where_is_this_data_measured"
Step 2. Understand the dimension of the data and see what are the
variables
dim(waste)
## [1] 217 51
names(waste)
## [1] "iso3c"
## [2] "region_id"
## [3] "country_name"
## [4] "income_id"
## [5] "gdp"
## [6] "composition_food_organic_waste_percent"
## [7] "composition_glass_percent"
## [8] "composition_metal_percent"
## [9] "composition_other_percent"
## [10] "composition_paper_cardboard_percent"
## [11] "composition_plastic_percent"
## [12] "composition_rubber_leather_percent"
## [13] "composition_wood_percent"
## [14] "composition_yard_garden_green_waste_percent"
## [15] "other_information_information_system_for_solid_waste_management"
## [16] "other_information_national_agency_to_enforce_solid_waste_laws_and_regulations"
## [17] "other_information_national_law_governing_solid_waste_management_in_the_country"
## [18] "other_information_ppp_rules_and_regulations"
## [19] "other_information_summary_of_key_solid_waste_information_made_available_to_the_public"
## [20] "population_population_number_of_people"
## [21] "special_waste_agricultural_waste_tons_year"
## [22] "special_waste_construction_and_demolition_waste_tons_year"
## [23] "special_waste_e_waste_tons_year"
## [24] "special_waste_hazardous_waste_tons_year"
## [25] "special_waste_industrial_waste_tons_year"
## [26] "special_waste_medical_waste_tons_year"
## [27] "total_msw_total_msw_generated_tons_year"
## [28] "waste_collection_coverage_rural_percent_of_geographic_area"
## [29] "waste_collection_coverage_rural_percent_of_households"
## [30] "waste_collection_coverage_rural_percent_of_population"
## [31] "waste_collection_coverage_rural_percent_of_waste"
## [32] "waste_collection_coverage_total_percent_of_geographic_area"
## [33] "waste_collection_coverage_total_percent_of_households"
## [34] "waste_collection_coverage_total_percent_of_population"
## [35] "waste_collection_coverage_total_percent_of_waste"
## [36] "waste_collection_coverage_urban_percent_of_geographic_area"
## [37] "waste_collection_coverage_urban_percent_of_households"
## [38] "waste_collection_coverage_urban_percent_of_population"
## [39] "waste_collection_coverage_urban_percent_of_waste"
## [40] "waste_treatment_anaerobic_digestion_percent"
## [41] "waste_treatment_compost_percent"
## [42] "waste_treatment_controlled_landfill_percent"
## [43] "waste_treatment_incineration_percent"
## [44] "waste_treatment_landfill_unspecified_percent"
## [45] "waste_treatment_open_dump_percent"
## [46] "waste_treatment_other_percent"
## [47] "waste_treatment_recycling_percent"
## [48] "waste_treatment_sanitary_landfill_landfill_gas_system_percent"
## [49] "waste_treatment_unaccounted_for_percent"
## [50] "waste_treatment_waterways_marine_percent"
## [51] "where_where_is_this_data_measured"
Step 4. Clean up numbers
# check the class of variables
class(waste_1$`metal percent`)
## [1] "numeric"
class(waste_1$`glass percent`)
## [1] "numeric"
# divide the percent numbers by 100
waste_1$`food organic waste percent` <- waste_1$`food organic waste percent`/100
waste_1$`glass percent` <- waste_1$`glass percent`/100
waste_1$`metal percent`/100
## [1] NA NA 0.04400000 0.04800000 0.02600000 0.03000000
## [7] 0.01840000 0.03400000 0.07900000 0.07000000 0.19380000 0.04400000
## [13] 0.03490000 0.02100000 0.01910000 0.01600000 0.01000000 0.00500000
## [19] 0.01980000 0.02100000 0.07000000 NA 0.02000000 0.05000000
## [25] 0.06000000 0.02501931 0.02900000 0.04900000 0.04000000 0.01000000
## [31] 0.00100000 NA 0.13000000 0.03000000 NA 0.02300000
## [37] 0.01100000 NA 0.00700000 NA NA 0.01100000
## [43] 0.04000000 NA 0.02000000 0.01600000 NA 0.06200000
## [49] 0.03250000 0.08400000 0.01400000 NA 0.05000000 0.06290000
## [55] 0.03000000 0.02800000 0.01530000 0.02000000 NA 0.03000000
## [61] 0.02600000 0.01200000 0.01700000 0.01900000 0.03000000 NA
## [67] 0.16730000 NA 0.03500000 0.05000000 0.01000000 0.04370000
## [73] 0.01000000 0.02100000 NA NA 0.03000000 0.02400000
## [79] 0.02000000 NA 0.09000000 0.04200000 0.02000000 0.02110000
## [85] 0.02100000 0.02500000 0.01800000 0.01800000 NA NA
## [91] 0.03700000 0.02300000 0.00980000 NA 0.03000000 0.04300000
## [97] 0.02380000 0.01500000 0.04000000 0.03000000 0.01000000 NA
## [103] 0.02000000 0.10000000 0.08800000 0.02600000 0.02000000 0.03700000
## [109] 0.05500000 NA 0.04300000 0.05000000 0.12500000 0.03000000
## [115] NA 0.02750000 0.02000000 0.02600000 0.01700000 NA
## [121] 0.02500000 0.02200000 0.03000000 NA 0.01000000 0.03400000
## [127] 0.06000000 0.02500000 0.03500000 0.04000000 NA 0.02900000
## [133] NA 0.10000000 NA 0.04200000 NA NA
## [139] 0.03000000 NA 0.10000000 0.01000000 NA NA
## [145] 0.04300000 0.06120000 0.01900000 0.10000000 0.04500000 NA
## [151] 0.04000000 0.03800000 0.02640000 0.04220000 0.10000000 NA
## [157] 0.02170000 0.10400000 0.01850000 NA 0.02700000 0.03000000
## [163] 0.09000000 0.02500000 0.01830000 NA 0.05000000 0.01700000
## [169] 0.02000000 0.19120000 0.13150000 NA 0.02000000 0.00900000
## [175] NA 0.01380000 NA NA 0.03000000 0.02000000
## [181] NA 0.09200000 NA 0.07000000 0.04600000 0.04000000
## [187] 0.07600000 0.02000000 0.04000000 0.01850000 NA 0.11440000
## [193] 0.10000000 0.09000000 0.03740000 0.04000000 0.01000000 0.10000000
## [199] 0.03070000 NA 0.00600000 0.04000000 0.05000000 0.09000000
## [205] 0.02000000 0.03800000 NA 0.08600000 0.07000000 0.00580000
## [211] 0.03800000 0.08800000 0.06000000 0.06000000 0.16910461 NA
## [217] 0.06000000
waste_1$`other percent` <- waste_1$`other percent`/100
# round the percentages to the tenth decimal place
waste_1$`gdp`
## [1] 35563.3125 2057.0623 8036.6904 13724.0586 43711.8008 67119.1328
## [7] 23550.0996 11019.8389 11113.4424 17965.5020 47784.1797 56029.6133
## [13] 14853.9102 839.7785 51914.9141 2227.3523 1925.3312 3195.7371
## [19] 22279.3613 47937.7773 35400.0820 12671.0078 18307.5195 7259.0244
## [25] 80982.3672 7984.4136 14596.2461 15444.7461 60865.9883 6743.0161
## [31] 14126.3896 822.6105 47672.0781 68394.2891 46672.5938 20361.7148
## [37] 16092.3008 3660.9028 3262.7637 1055.5730 4899.5796 12523.0068
## [43] 2959.5400 6353.7832 18168.7148 12984.7109 27503.7891 66207.4453
## [49] 39544.6836 40836.3281 53784.7812 6596.7754 11708.6396 57821.1992
## [55] 15328.3525 11826.1650 11896.3770 10301.1074 1714.5524 40985.5586
## [61] 36956.2852 1779.0840 48814.3086 10787.6855 46110.1836 44402.8789
## [67] 3440.0642 18514.6523 46290.1211 12605.1396 3092.5969 43711.8008
## [73] 1622.7769 2180.8557 1800.0686 24827.0723 30465.4492 13207.7119
## [79] 43948.5625 8125.3647 59074.9414 9812.0898 57215.9688 5395.6138
## [85] 28829.3555 2952.6187 32643.4902 10531.1982 44203.6211 6496.8076
## [91] 83388.7188 14535.8623 10310.6797 55274.1875 37687.7461 42419.7227
## [97] 9550.8105 10413.1758 41309.9570 22702.5801 3329.8545 4805.1411
## [103] 3364.2788 2249.7686 25569.3574 42105.2812 58810.3008 6543.6670
## [109] 16966.7402 1333.4222 8479.9463 14029.9824 45726.9883 12287.4727
## [115] 1978.6084 37278.3047 114323.4062 30982.1738 117335.5859 30385.8945
## [121] 6915.1030 43711.8008 10360.8301 1565.8669 17284.6094 19332.0742
## [127] 3628.8953 16147.7295 2007.8060 43707.5391 1093.7881 20752.6211
## [133] 10940.4150 60955.7266 1217.0902 4783.6357 20646.9336 999.4413
## [139] 23906.2656 6152.9873 57330.2031 1038.2241 4690.3794 4611.8403
## [145] 56849.3750 64962.2617 2902.2493 11167.3193 41857.4766 30535.8340
## [151] 4571.4146 28436.3594 11877.0840 7705.0454 18275.0254 3912.3811
## [157] 33221.5430 34311.0312 34961.9453 11810.4277 5985.6831 60955.7266
## [163] 96261.6641 29983.5566 26012.8496 1950.5281 48921.2305 4191.8232
## [169] 3067.6626 97341.4688 2596.4529 1237.6302 7328.7134 58806.1992
## [175] 1862.6141 18351.2734 1795.9919 3720.7869 16953.8027 31965.7617
## [181] 39037.9336 52608.7109 8321.1064 NA 23302.9531 8587.0508
## [187] 28174.4824 1732.6913 1404.1329 16301.5381 2616.4756 11471.1865
## [193] 3344.5737 5636.4185 28910.9082 10505.2627 28289.0879 3793.1277
## [199] 57362.9258 2128.5540 1972.3018 11534.6377 20588.3926 61498.3711
## [205] 5164.2744 11971.9932 14269.6738 24215.8828 30436.5645 5089.4111
## [211] 3061.7874 6210.9834 9723.5615 8269.6719 12666.6074 3201.2893
## [217] 3190.5066
waste_1$`food organic waste percent` <- round(waste_1$`food organic waste percent`, 2)
waste_1$`food organic waste percent`
## [1] NA NA 0.52 0.51 0.31 0.39 0.39 0.57 0.20 0.46 0.48 0.31 0.45 0.81 0.14
## [16] 0.52 0.21 0.81 0.24 0.59 0.46 NA 0.30 0.47 0.17 0.55 0.51 0.18 0.36 0.58
## [31] 0.08 NA 0.24 0.29 NA 0.53 0.61 NA 0.83 NA NA 0.60 0.50 NA 0.58
## [46] 0.69 NA 0.11 0.41 0.62 0.30 NA 0.45 0.13 0.51 0.54 0.59 0.56 NA 0.49
## [61] 0.37 0.88 0.36 0.33 0.32 NA 0.24 NA 0.17 0.39 0.67 0.25 0.58 0.35 NA
## [76] NA 0.40 0.27 0.43 0.38 0.27 0.50 0.35 0.58 0.31 0.61 0.22 0.54 NA NA
## [91] 0.17 0.73 0.69 0.10 0.34 0.34 0.62 0.50 0.36 0.30 0.57 NA 0.67 0.44 0.27
## [106] 0.30 0.45 0.17 0.52 NA 0.70 0.45 0.38 0.62 NA 0.40 0.30 0.47 0.03 NA
## [121] 0.60 NA 0.55 NA 0.24 0.52 0.08 0.29 0.17 0.52 NA 0.34 0.23 0.44 0.60
## [136] 0.05 0.64 NA 0.46 NA 0.44 0.38 NA NA 0.35 0.15 0.52 0.44 0.28 0.27
## [151] 0.30 0.46 0.50 0.52 0.26 0.31 0.37 0.13 0.36 NA 0.59 0.47 0.57 0.56 0.28
## [166] NA 0.46 0.51 0.12 0.11 0.43 NA 0.65 0.05 NA 0.38 NA NA 0.57 0.42
## [181] NA 0.23 NA 0.46 0.48 0.57 0.22 0.70 0.38 0.66 NA 0.16 0.44 0.51 0.27
## [196] 0.68 0.64 0.44 0.23 NA 0.74 0.37 0.54 0.15 0.40 0.50 NA 0.06 0.46 0.62
## [211] 0.73 0.43 0.42 0.65 0.16 NA 0.36
waste_1$`glass percent` <- round(waste_1$`glass percent`, 2)
waste_1$`glass percent`
## [1] NA NA 0.07 0.04 0.08 0.04 0.03 0.03 0.03 0.07 0.04 0.07 0.05 0.03 0.03
## [16] 0.02 0.01 0.00 0.06 0.03 0.07 NA 0.08 0.08 0.09 0.03 0.02 0.04 0.03 0.04
## [31] NA NA 0.06 0.04 NA 0.07 0.02 NA 0.00 NA NA 0.02 0.02 NA 0.01
## [46] 0.05 NA 0.04 0.03 0.02 0.10 NA 0.08 0.06 0.06 0.01 0.03 0.04 NA 0.08
## [61] 0.08 0.01 0.09 0.04 0.10 NA 0.07 NA 0.02 0.03 0.00 0.04 0.01 0.02 NA
## [76] NA 0.03 0.03 0.07 0.04 0.05 0.05 0.03 0.01 0.04 0.02 0.04 0.03 NA NA
## [91] 0.09 0.02 0.02 NA 0.03 0.08 0.03 0.02 0.05 0.05 0.03 NA 0.03 0.06 0.08
## [106] 0.03 0.04 0.09 0.04 NA 0.03 0.07 0.05 0.02 NA 0.09 0.04 0.21 0.03 NA
## [121] 0.02 0.03 0.04 NA NA 0.06 0.02 0.04 0.01 0.06 NA 0.09 NA 0.06 NA
## [136] 0.04 0.03 NA 0.03 NA 0.06 NA 0.03 NA 0.05 0.05 0.04 0.06 0.04 0.06
## [151] 0.06 0.06 0.03 0.02 0.05 NA 0.10 0.02 0.07 NA 0.02 0.03 0.04 0.05 0.17
## [166] NA 0.04 0.04 0.02 0.01 0.07 NA 0.04 0.06 NA 0.05 NA NA 0.03 0.19
## [181] NA 0.09 NA 0.07 0.05 0.03 0.20 0.01 0.02 0.03 NA 0.05 0.06 0.06 0.10
## [196] 0.02 0.02 0.06 0.04 NA 0.01 0.05 0.03 0.04 0.03 0.06 NA 0.18 0.07 0.02
## [211] 0.05 0.02 0.06 0.01 0.05 NA 0.05
waste_1$`metal percent` <- round(waste_1$`metal percent`, 2)
waste_1$`metal percent`
## [1] NA NA 4.40 4.80 2.60 3.00 1.84 3.40 7.90 7.00 19.38 4.40
## [13] 3.49 2.10 1.91 1.60 1.00 0.50 1.98 2.10 7.00 NA 2.00 5.00
## [25] 6.00 2.50 2.90 4.90 4.00 1.00 0.10 NA 13.00 3.00 NA 2.30
## [37] 1.10 NA 0.70 NA NA 1.10 4.00 NA 2.00 1.60 NA 6.20
## [49] 3.25 8.40 1.40 NA 5.00 6.29 3.00 2.80 1.53 2.00 NA 3.00
## [61] 2.60 1.20 1.70 1.90 3.00 NA 16.73 NA 3.50 5.00 1.00 4.37
## [73] 1.00 2.10 NA NA 3.00 2.40 2.00 NA 9.00 4.20 2.00 2.11
## [85] 2.10 2.50 1.80 1.80 NA NA 3.70 2.30 0.98 NA 3.00 4.30
## [97] 2.38 1.50 4.00 3.00 1.00 NA 2.00 10.00 8.80 2.60 2.00 3.70
## [109] 5.50 NA 4.30 5.00 12.50 3.00 NA 2.75 2.00 2.60 1.70 NA
## [121] 2.50 2.20 3.00 NA 1.00 3.40 6.00 2.50 3.50 4.00 NA 2.90
## [133] NA 10.00 NA 4.20 NA NA 3.00 NA 10.00 1.00 NA NA
## [145] 4.30 6.12 1.90 10.00 4.50 NA 4.00 3.80 2.64 4.22 10.00 NA
## [157] 2.17 10.40 1.85 NA 2.70 3.00 9.00 2.50 1.83 NA 5.00 1.70
## [169] 2.00 19.12 13.15 NA 2.00 0.90 NA 1.38 NA NA 3.00 2.00
## [181] NA 9.20 NA 7.00 4.60 4.00 7.60 2.00 4.00 1.85 NA 11.44
## [193] 10.00 9.00 3.74 4.00 1.00 10.00 3.07 NA 0.60 4.00 5.00 9.00
## [205] 2.00 3.80 NA 8.60 7.00 0.58 3.80 8.80 6.00 6.00 16.91 NA
## [217] 6.00
waste_1$`other percent` <- round(waste_1$`other percent`, 2)
waste_1$`other percent`
## [1] NA NA 0.12 0.15 0.12 0.10 0.15 0.17 0.26 0.12 0.03 0.18 0.14 0.03 0.40
## [16] 0.34 0.61 0.09 0.18 0.15 0.12 0.93 0.35 0.05 0.26 0.23 0.17 0.06 0.15 0.07
## [31] 0.92 NA 0.08 0.29 NA 0.16 0.13 NA 0.10 NA NA 0.16 0.22 NA 0.07
## [46] 0.03 NA 0.11 0.10 0.15 0.18 NA 0.14 0.24 0.10 0.01 0.14 0.15 NA 0.14
## [61] 0.16 0.04 0.13 0.04 0.26 NA 0.09 NA 0.28 0.16 0.25 0.14 0.27 0.51 NA
## [76] NA 0.09 0.36 0.33 0.18 0.09 0.12 0.14 0.07 0.12 0.17 0.46 0.13 NA NA
## [91] 0.26 0.05 0.06 NA 0.16 0.15 0.05 0.16 0.06 0.07 0.09 NA 0.10 0.09 0.12
## [106] NA 0.08 0.20 0.11 NA 0.04 0.11 0.09 0.13 NA 0.44 0.27 0.10 0.03 NA
## [121] 0.06 NA 0.23 NA 0.01 0.12 0.31 0.20 0.72 0.10 NA 0.19 0.50 0.09 0.15
## [136] 0.60 0.08 NA 0.09 NA 0.09 0.57 0.11 NA 0.20 0.13 0.07 0.09 0.37 0.24
## [151] 0.20 0.06 0.22 0.22 0.09 0.42 0.27 0.24 0.28 NA 0.12 0.29 0.05 0.11 0.19
## [166] NA 0.03 0.19 0.68 0.32 0.04 NA 0.07 0.66 NA 0.17 NA NA 0.24 0.01
## [181] NA NA NA 0.12 0.26 0.22 0.02 0.18 0.42 0.05 NA 0.63 0.09 0.09 0.17
## [196] 0.05 0.24 0.09 0.14 NA 0.11 0.22 0.07 0.03 0.17 0.10 NA 0.05 0.12 0.15
## [211] 0.06 0.19 0.20 0.06 0.45 NA 0.03
Step 5. Create scatter plots for gdp against other variables to see
whether there is a correlation.
# gdp vs.waste food organic percent
plot(waste_1$gdp, waste_1$`food organic waste percent`, xlab = "GDP", ylab = "waste food organic percent", main = "GDP vs waste food organic percent", frame.plot = FALSE, col = "#00D63E")

# Observation: overall, the lower the GDP for a certain country, the higher the waste food organic percent is. The two have a negative correlation.
# gdp vs.glass percent
plot(waste_1$gdp, waste_1$`glass percent`, xlab = "GDP", ylab = "glass percent", main = "GDP vs glass percent", frame.plot = FALSE, col = "#03A3A8", xlim = c(min = 0, max = 120000))

# Observation: it is harder to tell the correlation between the two variables, but countries with lower GDP appear to have low glass percent. Glass percent overall takes a very small part of the total waste produced by the country.
# gdp metal percent
plot(waste_1$gdp, waste_1$`metal percent`, xlab = "GDP", ylab = "metal percent", main = "GDP vs metal percent", frame.plot = FALSE, col = "#0090CE", xlim = c(min = 0, max = 120000))

# Observation: similar to the glass percent graph, countries with lower GDP appear to have low metal percent
# gdp other percent
plot(waste_1$gdp, waste_1$`other percent`, xlab = "GDP", ylab = "other percent", main = "GDP vs other percent", frame.plot = FALSE, col = "#5400CE", xlim = c(min = 0, max = 120000), ylim = c(min = 0, max = 1))

# Observation: similar to the graphs above, countries with lower GDP appear to have low metal percent