• Motivations

Motivations

Theo Statista thì thị phần của Viettel là 52.52%. Con số này sai khác đáng kể so với thị phần của Viettel (68%) tính từ bộ dữ liệu VHLSS 2020.

Dưới đây là R codes:

# Clear R environment: 
rm(list = ls())

# Load some R packages: 

library(haven)
library(stringi)
library(stringr)
library(dplyr)

# Load data (download from https://www.mediafire.com/file/b6hm4zngz2q5tc6/VHLSS_2020.zip/file): 

read_dta("E:/VHLSS 2020/VHLSS2020_Household_Data/HO1.dta") -> ho1


#--------------------------------------
#  Extract mobile-phone provider info
#--------------------------------------

library(rvest)

"https://www.thegioididong.com/hoi-dap/danh-sach-cac-dau-so-moi-cua-cac-nha-mang-vitettel-1263877" %>% read_html() -> html_content

extract_tableData <- function(j) {
  
  html_content %>% 
    html_nodes(xpath = str_c('//*[@id="bannerbody"]/table[', j, ']')) %>% 
    html_table() %>% 
    .[[1]] -> df_final
  
  df_final %>% 
    slice(-1) %>% 
    rename(so_cu = X1, so_moi = X2) -> df_main
  
  df_final$X1[1] %>% 
    stri_trans_general("Latin-ASCII") %>% 
    str_replace_all("Dau so | cu", "") -> provider_name
  
  df_main %>% 
    mutate(nha_mang = provider_name) %>% 
    return()
  
}


lapply(1:6, extract_tableData) -> df_list

do.call("bind_rows", df_list) -> mobi_info

mobi_info %>% 
  mutate(pattern = str_c("^", so_moi)) %>% 
  group_by(nha_mang) %>% 
  mutate(pattern = str_flatten(pattern, collapse = "|")) -> mobi_info

mobi_info %>% 
  filter(nha_mang == "Viettel") %>% 
  pull(pattern) %>% 
  unique() -> viettel_pre

mobi_info %>% 
  filter(nha_mang == "VinaPhone") %>% 
  pull(pattern) %>% 
  unique() -> vina_pre

mobi_info %>% 
  filter(nha_mang == "MobiFone") %>% 
  pull(pattern) %>% 
  unique() -> mobi_pre

mobi_info %>% 
  filter(nha_mang == "Vietnamobile") %>% 
  pull(pattern) %>% 
  unique() -> vietnam_pre

mobi_info %>% 
  filter(nha_mang == "Gmobile") %>% 
  pull(pattern) %>% 
  unique() -> Gmobi_pre

mobi_info %>% 
  filter(nha_mang == "Itelecom") %>% 
  pull(pattern) %>% 
  unique() -> itel_pre

ho1 %>% 
  mutate(nha_mang = case_when(str_detect(mobi, viettel_pre) ~ "Viettel", 
                              str_detect(mobi, vina_pre) ~ "VinaPhone", 
                              str_detect(mobi, mobi_pre) ~ "MobiFone", 
                              str_detect(mobi, vietnam_pre) ~ "Vietnammobile", 
                              str_detect(mobi, Gmobi_pre) ~ "Gmobile", 
                              str_detect(mobi, itel_pre) ~ "Itelecom", 
                              TRUE ~ "Unknown")) -> ho1

library(ggplot2)

ho1 %>% 
  filter(nha_mang != "Unknown") %>% 
  group_by(nha_mang) %>% 
  count(sort = TRUE) %>% 
  ungroup() %>% 
  mutate(share = 100*n / sum(n)) %>% 
  mutate(label = round(share, 2)) %>% 
  mutate(nha_mang = factor(nha_mang, nha_mang)) %>% 
  ggplot(aes(y = nha_mang, x = n)) + 
  geom_col() + 
  geom_text(aes(label = label), hjust = -.1) + 
  scale_x_continuous(limits = c(0, 30000)) + 
  theme(axis.title = element_blank()) + 
  labs(title = "Market share of mobile service providers in Vietnam")

LS0tDQp0aXRsZTogJ01hcmtldCBzaGFyZSBvZiBtb2JpbGUgc2VydmljZSBwcm92aWRlcnMgaW4gVmlldG5hbSBpbiAyMDIwJw0KYXV0aG9yOiAnQXV0aG9yOiBOZ3V5ZW4gQ2hpIER1bmcnDQpzdWJ0aXRsZTogIlIgZm9yIEZ1biINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDogDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgICMgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgaGlnaGxpZ2h0OiB6ZW5idXJuDQogICAgIyBudW1iZXJfc2VjdGlvbnM6IHllcw0KICAgIHRoZW1lOiAiZmxhdGx5Ig0KICAgIHRvYzogVFJVRQ0KICAgIHRvY19mbG9hdDogVFJVRQ0KLS0tDQoNCmBgYHtyIHNldHVwLGluY2x1ZGU9RkFMU0V9DQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUsIHdhcm5pbmcgPSBGQUxTRSwgbWVzc2FnZSA9IEZBTFNFLCBjYWNoZSA9IFRSVUUsIGZpZy5zaG93dGV4dCA9IFRSVUUpDQoNCmBgYA0KDQojIE1vdGl2YXRpb25zDQoNClRoZW8gU3RhdGlzdGEgdGjDrCBbdGjhu4sgcGjhuqduIGPhu6dhIFZpZXR0ZWwgbMOgIDUyLjUyJV0oaHR0cHM6Ly93d3cuc3RhdGlzdGEuY29tL3N0YXRpc3RpY3MvMTAwNjMwMS92aWV0bmFtLW1hcmtldC1zaGFyZS10ZXJyZXN0cmlhbC1tb2JpbGUtY2VsbHVsYXItc2VydmljZS1wcm92aWRlcnMvKS4gQ29uIHPhu5EgbsOgeSBzYWkga2jDoWMgxJHDoW5nIGvhu4Mgc28gduG7m2kgdGjhu4sgcGjhuqduIGPhu6dhIFZpZXR0ZWwgKDY4JSkgdMOtbmggdOG7qyBi4buZIGThu68gbGnhu4d1IFZITFNTIDIwMjAuIA0KDQpExrDhu5tpIMSRw6J5IGzDoCBSIGNvZGVzOiANCg0KDQpgYGB7cn0NCg0KIyBDbGVhciBSIGVudmlyb25tZW50OiANCnJtKGxpc3QgPSBscygpKQ0KDQojIExvYWQgc29tZSBSIHBhY2thZ2VzOiANCg0KbGlicmFyeShoYXZlbikNCmxpYnJhcnkoc3RyaW5naSkNCmxpYnJhcnkoc3RyaW5ncikNCmxpYnJhcnkoZHBseXIpDQoNCiMgTG9hZCBkYXRhIChkb3dubG9hZCBmcm9tIGh0dHBzOi8vd3d3Lm1lZGlhZmlyZS5jb20vZmlsZS9iNmhtNHpuZ3oycTV0YzYvVkhMU1NfMjAyMC56aXAvZmlsZSk6IA0KDQpyZWFkX2R0YSgiRTovVkhMU1MgMjAyMC9WSExTUzIwMjBfSG91c2Vob2xkX0RhdGEvSE8xLmR0YSIpIC0+IGhvMQ0KDQoNCiMtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQ0KIyAgRXh0cmFjdCBtb2JpbGUtcGhvbmUgcHJvdmlkZXIgaW5mbw0KIy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tDQoNCmxpYnJhcnkocnZlc3QpDQoNCiJodHRwczovL3d3dy50aGVnaW9pZGlkb25nLmNvbS9ob2ktZGFwL2Rhbmgtc2FjaC1jYWMtZGF1LXNvLW1vaS1jdWEtY2FjLW5oYS1tYW5nLXZpdGV0dGVsLTEyNjM4NzciICU+JSByZWFkX2h0bWwoKSAtPiBodG1sX2NvbnRlbnQNCg0KZXh0cmFjdF90YWJsZURhdGEgPC0gZnVuY3Rpb24oaikgew0KICANCiAgaHRtbF9jb250ZW50ICU+JSANCiAgICBodG1sX25vZGVzKHhwYXRoID0gc3RyX2MoJy8vKltAaWQ9ImJhbm5lcmJvZHkiXS90YWJsZVsnLCBqLCAnXScpKSAlPiUgDQogICAgaHRtbF90YWJsZSgpICU+JSANCiAgICAuW1sxXV0gLT4gZGZfZmluYWwNCiAgDQogIGRmX2ZpbmFsICU+JSANCiAgICBzbGljZSgtMSkgJT4lIA0KICAgIHJlbmFtZShzb19jdSA9IFgxLCBzb19tb2kgPSBYMikgLT4gZGZfbWFpbg0KICANCiAgZGZfZmluYWwkWDFbMV0gJT4lIA0KICAgIHN0cmlfdHJhbnNfZ2VuZXJhbCgiTGF0aW4tQVNDSUkiKSAlPiUgDQogICAgc3RyX3JlcGxhY2VfYWxsKCJEYXUgc28gfCBjdSIsICIiKSAtPiBwcm92aWRlcl9uYW1lDQogIA0KICBkZl9tYWluICU+JSANCiAgICBtdXRhdGUobmhhX21hbmcgPSBwcm92aWRlcl9uYW1lKSAlPiUgDQogICAgcmV0dXJuKCkNCiAgDQp9DQoNCg0KbGFwcGx5KDE6NiwgZXh0cmFjdF90YWJsZURhdGEpIC0+IGRmX2xpc3QNCg0KZG8uY2FsbCgiYmluZF9yb3dzIiwgZGZfbGlzdCkgLT4gbW9iaV9pbmZvDQoNCm1vYmlfaW5mbyAlPiUgDQogIG11dGF0ZShwYXR0ZXJuID0gc3RyX2MoIl4iLCBzb19tb2kpKSAlPiUgDQogIGdyb3VwX2J5KG5oYV9tYW5nKSAlPiUgDQogIG11dGF0ZShwYXR0ZXJuID0gc3RyX2ZsYXR0ZW4ocGF0dGVybiwgY29sbGFwc2UgPSAifCIpKSAtPiBtb2JpX2luZm8NCg0KbW9iaV9pbmZvICU+JSANCiAgZmlsdGVyKG5oYV9tYW5nID09ICJWaWV0dGVsIikgJT4lIA0KICBwdWxsKHBhdHRlcm4pICU+JSANCiAgdW5pcXVlKCkgLT4gdmlldHRlbF9wcmUNCg0KbW9iaV9pbmZvICU+JSANCiAgZmlsdGVyKG5oYV9tYW5nID09ICJWaW5hUGhvbmUiKSAlPiUgDQogIHB1bGwocGF0dGVybikgJT4lIA0KICB1bmlxdWUoKSAtPiB2aW5hX3ByZQ0KDQptb2JpX2luZm8gJT4lIA0KICBmaWx0ZXIobmhhX21hbmcgPT0gIk1vYmlGb25lIikgJT4lIA0KICBwdWxsKHBhdHRlcm4pICU+JSANCiAgdW5pcXVlKCkgLT4gbW9iaV9wcmUNCg0KbW9iaV9pbmZvICU+JSANCiAgZmlsdGVyKG5oYV9tYW5nID09ICJWaWV0bmFtb2JpbGUiKSAlPiUgDQogIHB1bGwocGF0dGVybikgJT4lIA0KICB1bmlxdWUoKSAtPiB2aWV0bmFtX3ByZQ0KDQptb2JpX2luZm8gJT4lIA0KICBmaWx0ZXIobmhhX21hbmcgPT0gIkdtb2JpbGUiKSAlPiUgDQogIHB1bGwocGF0dGVybikgJT4lIA0KICB1bmlxdWUoKSAtPiBHbW9iaV9wcmUNCg0KbW9iaV9pbmZvICU+JSANCiAgZmlsdGVyKG5oYV9tYW5nID09ICJJdGVsZWNvbSIpICU+JSANCiAgcHVsbChwYXR0ZXJuKSAlPiUgDQogIHVuaXF1ZSgpIC0+IGl0ZWxfcHJlDQoNCmhvMSAlPiUgDQogIG11dGF0ZShuaGFfbWFuZyA9IGNhc2Vfd2hlbihzdHJfZGV0ZWN0KG1vYmksIHZpZXR0ZWxfcHJlKSB+ICJWaWV0dGVsIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfZGV0ZWN0KG1vYmksIHZpbmFfcHJlKSB+ICJWaW5hUGhvbmUiLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHN0cl9kZXRlY3QobW9iaSwgbW9iaV9wcmUpIH4gIk1vYmlGb25lIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfZGV0ZWN0KG1vYmksIHZpZXRuYW1fcHJlKSB+ICJWaWV0bmFtbW9iaWxlIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfZGV0ZWN0KG1vYmksIEdtb2JpX3ByZSkgfiAiR21vYmlsZSIsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc3RyX2RldGVjdChtb2JpLCBpdGVsX3ByZSkgfiAiSXRlbGVjb20iLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFRSVUUgfiAiVW5rbm93biIpKSAtPiBobzENCg0KbGlicmFyeShnZ3Bsb3QyKQ0KDQpobzEgJT4lIA0KICBmaWx0ZXIobmhhX21hbmcgIT0gIlVua25vd24iKSAlPiUgDQogIGdyb3VwX2J5KG5oYV9tYW5nKSAlPiUgDQogIGNvdW50KHNvcnQgPSBUUlVFKSAlPiUgDQogIHVuZ3JvdXAoKSAlPiUgDQogIG11dGF0ZShzaGFyZSA9IDEwMCpuIC8gc3VtKG4pKSAlPiUgDQogIG11dGF0ZShsYWJlbCA9IHJvdW5kKHNoYXJlLCAyKSkgJT4lIA0KICBtdXRhdGUobmhhX21hbmcgPSBmYWN0b3IobmhhX21hbmcsIG5oYV9tYW5nKSkgJT4lIA0KICBnZ3Bsb3QoYWVzKHkgPSBuaGFfbWFuZywgeCA9IG4pKSArIA0KICBnZW9tX2NvbCgpICsgDQogIGdlb21fdGV4dChhZXMobGFiZWwgPSBsYWJlbCksIGhqdXN0ID0gLS4xKSArIA0KICBzY2FsZV94X2NvbnRpbnVvdXMobGltaXRzID0gYygwLCAzMDAwMCkpICsgDQogIHRoZW1lKGF4aXMudGl0bGUgPSBlbGVtZW50X2JsYW5rKCkpICsgDQogIGxhYnModGl0bGUgPSAiTWFya2V0IHNoYXJlIG9mIG1vYmlsZSBzZXJ2aWNlIHByb3ZpZGVycyBpbiBWaWV0bmFtIikNCiAgDQpgYGANCg0K