This notebook attemtps to clean the municipality data in the list of Bancnet ATMs in the Philippines by fuzzy merging with a clean list of municipalities.
Read in data. The clean list of provinces and municipalities comes from a random github user but it appears to be good.
bn <- read_excel(file.path(data_path, "BancNet ATMs as of March 31, 2020.xls"))
colnames(bn) <- tolower(colnames(bn))
git_repo <- "https://raw.githubusercontent.com/clavearnel/philippines-region-province-citymun-brgy/master/csv/"
muni <- read_csv(paste0(git_repo, "refcitymun.csv"))
prov <- read_csv(paste0(git_repo, "refprovince.csv"))
region <- read_csv(paste0(git_repo, "refregion.csv"))
Look at provinces from the bancnet data which donโt match with provinces from the other source.
bn %>%
mutate(cityprov = str_trim(toupper(cityprov))) %>%
distinct(cityprov) %>%
anti_join(prov, by = c("cityprov" = "provDesc"))
# Merge muni with prov to get province name and clean up the province and muni names
# by removing white space and capitalizing
muni_m <- left_join(muni, prov, by = 'provCode') %>%
mutate(provDesc= str_trim(toupper(provDesc)),
citymunDesc = str_trim(toupper(citymunDesc)))
# Generate unique list of muni names from the bancnet data
bn_muni <- bn %>% select(region, cityprov, district) %>% distinct()
# fuzzy match the bancnet data to the other list
merged <- stringdist_inner_join(bn_muni, muni_m, by = c("cityprov" = "provDesc", "district" = "citymunDesc"))
# Drop rows where the bancnet info matches the other source exactly
merged <- merged %>% filter((cityprov != provDesc) |( district !=citymunDesc))
# Generate new columns based on alphabetic order and then drop pairs of rows
# where A is mapped to B and B to A.
merged <- merged %>%
mutate(first = ifelse(district < citymunDesc, district, citymunDesc),
second = ifelse(district > citymunDesc, district, citymunDesc))
merged <- merged %>% distinct(cityprov, first, second, .keep_all = TRUE)
# Drop the temp columns
merged <- merged %>% select(-first, -second)
write_csv(merged, file.path(data_path, "candidate_muni_matches.csv"))
LS0tDQp0aXRsZTogIkNsZWFuIEJhbmNuZXQgQVRNIG11bmljaXBhbGl0eSBuYW1lIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KVGhpcyBub3RlYm9vayBhdHRlbXRwcyB0byBjbGVhbiB0aGUgbXVuaWNpcGFsaXR5IGRhdGEgaW4gdGhlIGxpc3Qgb2YgQmFuY25ldCBBVE1zIGluIHRoZSBQaGlsaXBwaW5lcyBieSBmdXp6eSBtZXJnaW5nIHdpdGggYSBjbGVhbiBsaXN0IG9mIG11bmljaXBhbGl0aWVzLg0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRSwgbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKTsgbGlicmFyeShyZWFkeGwpOyBsaWJyYXJ5KGZ1enp5am9pbikNCmRhdGFfcGF0aCA8LSAiQzovVXNlcnMvZG91Z2ovRG9jdW1lbnRzL0RhdGEvV0IiDQpgYGANCg0KUmVhZCBpbiBkYXRhLiBUaGUgY2xlYW4gbGlzdCBvZiBwcm92aW5jZXMgYW5kIG11bmljaXBhbGl0aWVzIGNvbWVzIGZyb20gYSByYW5kb20gZ2l0aHViIHVzZXIgYnV0IGl0IGFwcGVhcnMgdG8gYmUgZ29vZC4gDQpgYGB7ciwgbWVzc2FnZT1GQUxTRX0NCmJuIDwtIHJlYWRfZXhjZWwoZmlsZS5wYXRoKGRhdGFfcGF0aCwgIkJhbmNOZXQgQVRNcyBhcyBvZiBNYXJjaCAzMSwgMjAyMC54bHMiKSkNCmNvbG5hbWVzKGJuKSA8LSB0b2xvd2VyKGNvbG5hbWVzKGJuKSkNCg0KZ2l0X3JlcG8gPC0gImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9jbGF2ZWFybmVsL3BoaWxpcHBpbmVzLXJlZ2lvbi1wcm92aW5jZS1jaXR5bXVuLWJyZ3kvbWFzdGVyL2Nzdi8iDQoNCm11bmkgPC0gcmVhZF9jc3YocGFzdGUwKGdpdF9yZXBvLCAicmVmY2l0eW11bi5jc3YiKSkNCnByb3YgPC0gcmVhZF9jc3YocGFzdGUwKGdpdF9yZXBvLCAicmVmcHJvdmluY2UuY3N2IikpDQpyZWdpb24gPC0gcmVhZF9jc3YocGFzdGUwKGdpdF9yZXBvLCAicmVmcmVnaW9uLmNzdiIpKQ0KYGBgDQoNCkxvb2sgYXQgcHJvdmluY2VzIGZyb20gdGhlIGJhbmNuZXQgZGF0YSB3aGljaCBkb24ndCBtYXRjaCB3aXRoIHByb3ZpbmNlcyBmcm9tIHRoZSBvdGhlciBzb3VyY2UuDQpgYGB7cn0NCmJuICU+JSANCiAgbXV0YXRlKGNpdHlwcm92ID0gc3RyX3RyaW0odG91cHBlcihjaXR5cHJvdikpKSAlPiUgDQogIGRpc3RpbmN0KGNpdHlwcm92KSAlPiUgDQogIGFudGlfam9pbihwcm92LCBieSA9IGMoImNpdHlwcm92IiA9ICJwcm92RGVzYyIpKQ0KYGBgDQoNCg0KYGBge3J9DQojIE1lcmdlIG11bmkgd2l0aCBwcm92IHRvIGdldCBwcm92aW5jZSBuYW1lIGFuZCBjbGVhbiB1cCB0aGUgcHJvdmluY2UgYW5kIG11bmkgbmFtZXMNCiMgYnkgcmVtb3Zpbmcgd2hpdGUgc3BhY2UgYW5kIGNhcGl0YWxpemluZw0KbXVuaV9tIDwtIGxlZnRfam9pbihtdW5pLCBwcm92LCBieSA9ICdwcm92Q29kZScpICU+JSANCiAgbXV0YXRlKHByb3ZEZXNjPSBzdHJfdHJpbSh0b3VwcGVyKHByb3ZEZXNjKSksIA0KICAgICAgICAgY2l0eW11bkRlc2MgPSBzdHJfdHJpbSh0b3VwcGVyKGNpdHltdW5EZXNjKSkpDQoNCiMgR2VuZXJhdGUgdW5pcXVlIGxpc3Qgb2YgbXVuaSBuYW1lcyBmcm9tIHRoZSBiYW5jbmV0IGRhdGENCmJuX211bmkgPC0gYm4gJT4lIHNlbGVjdChyZWdpb24sIGNpdHlwcm92LCBkaXN0cmljdCkgJT4lIGRpc3RpbmN0KCkNCg0KIyBmdXp6eSBtYXRjaCB0aGUgYmFuY25ldCBkYXRhIHRvIHRoZSBvdGhlciBsaXN0DQptZXJnZWQgPC0gc3RyaW5nZGlzdF9pbm5lcl9qb2luKGJuX211bmksIG11bmlfbSwgIGJ5ID0gYygiY2l0eXByb3YiID0gInByb3ZEZXNjIiwgICJkaXN0cmljdCIgPSAiY2l0eW11bkRlc2MiKSkNCg0KIyBEcm9wIHJvd3Mgd2hlcmUgdGhlIGJhbmNuZXQgaW5mbyBtYXRjaGVzIHRoZSBvdGhlciBzb3VyY2UgZXhhY3RseQ0KbWVyZ2VkIDwtIG1lcmdlZCAlPiUgZmlsdGVyKChjaXR5cHJvdiAhPSBwcm92RGVzYykgIHwoIGRpc3RyaWN0ICE9Y2l0eW11bkRlc2MpKQ0KDQojIEdlbmVyYXRlIG5ldyBjb2x1bW5zIGJhc2VkIG9uIGFscGhhYmV0aWMgb3JkZXIgYW5kIHRoZW4gZHJvcCBwYWlycyBvZiByb3dzDQojIHdoZXJlIEEgaXMgbWFwcGVkIHRvIEIgYW5kIEIgdG8gQS4gDQptZXJnZWQgPC0gbWVyZ2VkICU+JSANCiAgbXV0YXRlKGZpcnN0ID0gaWZlbHNlKGRpc3RyaWN0IDwgY2l0eW11bkRlc2MsIGRpc3RyaWN0LCBjaXR5bXVuRGVzYyksDQogICAgICAgICBzZWNvbmQgPSBpZmVsc2UoZGlzdHJpY3QgPiBjaXR5bXVuRGVzYywgZGlzdHJpY3QsIGNpdHltdW5EZXNjKSkNCm1lcmdlZCA8LSBtZXJnZWQgJT4lIGRpc3RpbmN0KGNpdHlwcm92LCBmaXJzdCwgc2Vjb25kLCAua2VlcF9hbGwgPSBUUlVFKQ0KDQojIERyb3AgdGhlIHRlbXAgY29sdW1ucw0KbWVyZ2VkIDwtIG1lcmdlZCAlPiUgc2VsZWN0KC1maXJzdCwgLXNlY29uZCkNCndyaXRlX2NzdihtZXJnZWQsIGZpbGUucGF0aChkYXRhX3BhdGgsICJjYW5kaWRhdGVfbXVuaV9tYXRjaGVzLmNzdiIpKQ0KDQpgYGANCg0KDQoNCg0KDQo=