# Web scraping from the website: Open Gov't Data Platform of India-https://data.gov.in
library(XML)
library(RCurl)
## Loading required package: bitops
#Url to fetch the metrics report
url="https://data.gov.in/metrics/central"
url1=getURL(url)
#Read HTML Table
mytabl2=readHTMLTable(url1,stringsAsFactors=FALSE)
# Convert to data frame
mytabl3=as.data.frame(mytabl2)
str(mytabl3)
## 'data.frame': 92 obs. of 5 variables:
## $ Central.Publication.Metrics.Ministry.Department: chr "Ministry of Home Affairs" "Department of Home" "Registrar General and Census Commissioner, India" "Department of States" ...
## $ Central.Publication.Metrics.Resource..Dataset. : chr "4198" "3947" "3937" "234" ...
## $ Central.Publication.Metrics.Resource..App. : chr "0" "0" "0" "0" ...
## $ Central.Publication.Metrics.Total.Resources : chr "4198" "3947" "3937" "234" ...
## $ Central.Publication.Metrics.Total.Catalogs : chr "246" "111" "108" "118" ...
head(mytabl3)
## Central.Publication.Metrics.Ministry.Department
## 1 Ministry of Home Affairs
## 2 Department of Home
## 3 Registrar General and Census Commissioner, India
## 4 Department of States
## 5 National Crime Records Bureau (NCRB)
## 6 Ministry of Agriculture
## Central.Publication.Metrics.Resource..Dataset.
## 1 4198
## 2 3947
## 3 3937
## 4 234
## 5 234
## 6 3111
## Central.Publication.Metrics.Resource..App.
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## Central.Publication.Metrics.Total.Resources
## 1 4198
## 2 3947
## 3 3937
## 4 234
## 5 234
## 6 3111
## Central.Publication.Metrics.Total.Catalogs
## 1 246
## 2 111
## 3 108
## 4 118
## 5 118
## 6 425
#Changing the headers of the table to valid names
names(mytabl3)[1:5]<-c("Ministry","Resource(Dataset)","Resource(App)","Total Resources","Total Catalogs")
str(mytabl3)
## 'data.frame': 92 obs. of 5 variables:
## $ Ministry : chr "Ministry of Home Affairs" "Department of Home" "Registrar General and Census Commissioner, India" "Department of States" ...
## $ Resource(Dataset): chr "4198" "3947" "3937" "234" ...
## $ Resource(App) : chr "0" "0" "0" "0" ...
## $ Total Resources : chr "4198" "3947" "3937" "234" ...
## $ Total Catalogs : chr "246" "111" "108" "118" ...
head(mytabl3)
## Ministry Resource(Dataset)
## 1 Ministry of Home Affairs 4198
## 2 Department of Home 3947
## 3 Registrar General and Census Commissioner, India 3937
## 4 Department of States 234
## 5 National Crime Records Bureau (NCRB) 234
## 6 Ministry of Agriculture 3111
## Resource(App) Total Resources Total Catalogs
## 1 0 4198 246
## 2 0 3947 111
## 3 0 3937 108
## 4 0 234 118
## 5 0 234 118
## 6 0 3111 425
#Storing the final table without the 3rd column- Resource(App)
mytabl_final=mytabl3[-3]
head(mytabl_final)
## Ministry Resource(Dataset)
## 1 Ministry of Home Affairs 4198
## 2 Department of Home 3947
## 3 Registrar General and Census Commissioner, India 3937
## 4 Department of States 234
## 5 National Crime Records Bureau (NCRB) 234
## 6 Ministry of Agriculture 3111
## Total Resources Total Catalogs
## 1 4198 246
## 2 3947 111
## 3 3937 108
## 4 234 118
## 5 234 118
## 6 3111 425