Intro
I have scraped the participants page to get insight about the course mates.
By end of this course, I want to transform the time column in order to:-
Know the time with the highest traffic associated with each country check if the course still crowded on last week of course as it is was in the first week.
code
library(rvest)
library(dplyr)
library(ggplot2)
library(cairoDevice)
url <- "https://journalismcourses.org/"
session <- html_session(url)
login <- session %>%
html_node("#login") %>%
html_form() %>%
set_values(username = "ammar_k", password = "mrammar1914")
journal <- session %>%
submit_form(login) %>%
read_html()
session<-
session %>%
jump_to("https://journalismcourses.org/user/index.php?contextid=48009&roleid=0&id=9&search&perpage=5000")
population <- session %>%
html_nodes(xpath='//*[@id="participants"]') %>%
html_table()
Sys.time()
participants <- population[[1]]
participants <- participants[1:2854,2:5]
colnames(participants ) <- c("name","city","country","time")
count <- as.data.frame(table(participants$country))
# Visualization
ggplot(participants, aes(x=country))+
geom_bar()+
coord_flip()Table
| country | frequency |
|---|---|
| United States | 863 |
| Brazil | 339 |
| Spain | 129 |
| United Kingdom | 126 |
| India | 98 |
| Mexico | 86 |
| Canada | 68 |
| Germany | 62 |
| Argentina | 57 |
| Nigeria | 57 |
| Colombia | 46 |
| Saudi Arabia | 45 |
| Australia | 36 |
| Ukraine | 34 |
| Chile | 29 |
| Italy | 29 |
| Venezuela, Bolivarian Republic Of | 28 |
| Korea, Republic Of | 27 |
| Peru | 27 |
| Egypt | 24 |
| Russian Federation | 24 |
| Philippines | 23 |
| Portugal | 23 |
| Hong Kong | 22 |
| Netherlands | 22 |
| France | 21 |
| South Africa | 21 |
| Turkey | 21 |
| Indonesia | 19 |
| Ghana | 18 |
| Greece | 14 |
| Pakistan | 14 |
| Belgium | 13 |
| Kenya | 12 |
| Poland | 12 |
| China | 11 |
| Ecuador | 11 |
| Japan | 11 |
| Sweden | 11 |
| Switzerland | 11 |
| Bolivia, Plurinational State Of | 10 |
| Denmark | 10 |
| Guatemala | 10 |
| Ireland | 10 |
| Finland | 9 |
| Morocco | 9 |
| Singapore | 9 |
| Austria | 8 |
| Jordan | 8 |
| Czechia | 7 |
| Israel | 7 |
| Nepal | 7 |
| United Arab Emirates | 7 |
| Bangladesh | 6 |
| Belarus | 6 |
| Iran, Islamic Republic Of | 6 |
| Lebanon | 6 |
| New Zealand | 6 |
| Taiwan | 6 |
| 5 | |
| Costa Rica | 5 |
| Dominican Republic | 5 |
| Hungary | 5 |
| Kyrgyzstan | 5 |
| Romania | 5 |
| Uruguay | 5 |
| Cameroon | 4 |
| El Salvador | 4 |
| Honduras | 4 |
| Mozambique | 4 |
| Norway | 4 |
| Palestine, State Of | 4 |
| Paraguay | 4 |
| Qatar | 4 |
| Thailand | 4 |
| Uganda | 4 |
| Viet Nam | 4 |
| Bulgaria | 3 |
| Croatia | 3 |
| Kazakhstan | 3 |
| Nicaragua | 3 |
| Senegal | 3 |
| Somalia | 3 |
| Trinidad And Tobago | 3 |
| Tunisia | 3 |
| Afghanistan | 2 |
| Azerbaijan | 2 |
| Bosnia And Herzegovina | 2 |
| Botswana | 2 |
| Ethiopia | 2 |
| Latvia | 2 |
| Liberia | 2 |
| Malaysia | 2 |
| Myanmar | 2 |
| Oman | 2 |
| Serbia | 2 |
| Slovakia | 2 |
| Slovenia | 2 |
| Swaziland | 2 |
| Syrian Arab Republic | 2 |
| Yemen | 2 |
| Zimbabwe | 2 |
| Algeria | 1 |
| Armenia | 1 |
| Bahrain | 1 |
| Benin | 1 |
| Bermuda | 1 |
| Burundi | 1 |
| Congo, The Democratic Republic Of The | 1 |
| Cuba | 1 |
| Cyprus | 1 |
| Fiji | 1 |
| Georgia | 1 |
| Haiti | 1 |
| Jamaica | 1 |
| Lithuania | 1 |
| Malawi | 1 |
| Micronesia, Federated States Of | 1 |
| Moldova, Republic Of | 1 |
| Mongolia | 1 |
| Namibia | 1 |
| Puerto Rico | 1 |
| Sierra Leone | 1 |
| Sri Lanka | 1 |
| Sudan | 1 |
| Tajikistan | 1 |
| Tanzania, United Republic Of | 1 |
| Zambia | 1 |
plot
ggplot(count, aes(x= reorder(count$country, count$frequency), y= frequency))+
geom_col()+
coord_flip()+
xlab("country")+
scale_y_continuous(sec.axis = dup_axis())