# Web scraping from the website: Open Gov't Data Platform of India-https://data.gov.in
library(XML)
library(RCurl)
## Loading required package: bitops
#Url to fetch the metrics report
  url="https://data.gov.in/metrics/central"
  url1=getURL(url)
#Read HTML Table
  mytabl2=readHTMLTable(url1,stringsAsFactors=FALSE)
# Convert to data frame
  mytabl3=as.data.frame(mytabl2)
  str(mytabl3)
## 'data.frame':    92 obs. of  5 variables:
##  $ Central.Publication.Metrics.Ministry.Department: chr  "Ministry of Home Affairs" "Department of Home" "Registrar General and Census Commissioner, India" "Department of States" ...
##  $ Central.Publication.Metrics.Resource..Dataset. : chr  "4198" "3947" "3937" "234" ...
##  $ Central.Publication.Metrics.Resource..App.     : chr  "0" "0" "0" "0" ...
##  $ Central.Publication.Metrics.Total.Resources    : chr  "4198" "3947" "3937" "234" ...
##  $ Central.Publication.Metrics.Total.Catalogs     : chr  "246" "111" "108" "118" ...
  head(mytabl3)
##    Central.Publication.Metrics.Ministry.Department
## 1                         Ministry of Home Affairs
## 2                               Department of Home
## 3 Registrar General and Census Commissioner, India
## 4                             Department of States
## 5             National Crime Records Bureau (NCRB)
## 6                          Ministry of Agriculture
##   Central.Publication.Metrics.Resource..Dataset.
## 1                                           4198
## 2                                           3947
## 3                                           3937
## 4                                            234
## 5                                            234
## 6                                           3111
##   Central.Publication.Metrics.Resource..App.
## 1                                          0
## 2                                          0
## 3                                          0
## 4                                          0
## 5                                          0
## 6                                          0
##   Central.Publication.Metrics.Total.Resources
## 1                                        4198
## 2                                        3947
## 3                                        3937
## 4                                         234
## 5                                         234
## 6                                        3111
##   Central.Publication.Metrics.Total.Catalogs
## 1                                        246
## 2                                        111
## 3                                        108
## 4                                        118
## 5                                        118
## 6                                        425
#Changing the headers of the table to valid names
  names(mytabl3)[1:5]<-c("Ministry","Resource(Dataset)","Resource(App)","Total Resources","Total Catalogs")
  str(mytabl3)
## 'data.frame':    92 obs. of  5 variables:
##  $ Ministry         : chr  "Ministry of Home Affairs" "Department of Home" "Registrar General and Census Commissioner, India" "Department of States" ...
##  $ Resource(Dataset): chr  "4198" "3947" "3937" "234" ...
##  $ Resource(App)    : chr  "0" "0" "0" "0" ...
##  $ Total Resources  : chr  "4198" "3947" "3937" "234" ...
##  $ Total Catalogs   : chr  "246" "111" "108" "118" ...
  head(mytabl3)
##                                           Ministry Resource(Dataset)
## 1                         Ministry of Home Affairs              4198
## 2                               Department of Home              3947
## 3 Registrar General and Census Commissioner, India              3937
## 4                             Department of States               234
## 5             National Crime Records Bureau (NCRB)               234
## 6                          Ministry of Agriculture              3111
##   Resource(App) Total Resources Total Catalogs
## 1             0            4198            246
## 2             0            3947            111
## 3             0            3937            108
## 4             0             234            118
## 5             0             234            118
## 6             0            3111            425
#Storing the final table without the 3rd column- Resource(App)
  mytabl_final=mytabl3[-3]
  head(mytabl_final)
##                                           Ministry Resource(Dataset)
## 1                         Ministry of Home Affairs              4198
## 2                               Department of Home              3947
## 3 Registrar General and Census Commissioner, India              3937
## 4                             Department of States               234
## 5             National Crime Records Bureau (NCRB)               234
## 6                          Ministry of Agriculture              3111
##   Total Resources Total Catalogs
## 1            4198            246
## 2            3947            111
## 3            3937            108
## 4             234            118
## 5             234            118
## 6            3111            425