#Web Scraping
#Download Weather historical records for multiple cities for customized dates from html page
library(XML);library(RCurl)
## Warning: package 'RCurl' was built under R version 3.1.2
## Loading required package: bitops
x_file ='http://www.wunderground.com/history/airport/KSFO/2015/12/13/CustomHistory.html/cgi-bin/histGraphAll?day=13&year=2015&month=12&dayend=16&yearend=2015&monthend=12&ID=KATL&type=6&width=614&format=1'
x_url = getURLContent(x_file, ssl.verifypeer=F)
x_data=read.csv(textConnection(x_url), header=T)
head(x_data,1)
## PST Max.TemperatureF Mean.TemperatureF Min.TemperatureF
## 1 2015-12-13 57 53 48
## Max.Dew.PointF MeanDew.PointF Min.DewpointF Max.Humidity Mean.Humidity
## 1 55 46 36 96 79
## Min.Humidity Max.Sea.Level.PressureIn Mean.Sea.Level.PressureIn
## 1 61 30.16 30
## Min.Sea.Level.PressureIn Max.VisibilityMiles Mean.VisibilityMiles
## 1 29.86 10 8
## Min.VisibilityMiles Max.Wind.SpeedMPH Mean.Wind.SpeedMPH
## 1 1 37 19
## Max.Gust.SpeedMPH PrecipitationIn CloudCover Events WindDirDegrees.br...
## 1 47 0.76 6 Rain 244<br />
#######################################################
AirpotList <- read.csv("~/Documents/S&K/Kunal/Major_Airports.csv", header=T)
AirpotList$airportCode <- paste0("K",AirpotList[,4])
AirpotList <- AirpotList$airportCode; AirpotList <- AirpotList[1:5]
#AirpotList <- c("KATL", "KSFO", "KMIA")
#######################################################
V <- c("Date", "Max.TemperatureF", "Mean.TemperatureF", "Min.TemperatureF", "Max.Humidity", "Events")
x_data=NULL
Kunal_fn <- function(i) {
a2 = AirpotList[i]
a3 = "/2015/1/1/"
a6 = "dayend=25&yearend=2015&monthend=12&"
a8 = AirpotList[i]
a1 = 'http://www.wunderground.com/history/airport/'
a4 = 'CustomHistory.html/cgi-bin/histGraphAll?'
a5= 'day=13&year=2015&month=12&'
a7 = 'ID='
a9 = '&type=6&width=614&format=1'
URL <- paste0(a1, a2, a3,a4,a5,a6,a7,a8,a9)
x_file=URL
x_url = getURLContent(x_file, ssl.verifypeer=F)
x_data=read.csv(textConnection(x_url), header=T)
x_data <- x_data[,c(1:4,8,22)]
colnames(x_data) <- V
x_data <- cbind(x_data, Airport=AirpotList[i])
x_data1 <- x_data
return(x_data1)
}
#######################################################
length(AirpotList)
## [1] 5
TempData <- lapply(1:length(AirpotList), function(x) Kunal_fn(x))
head(TempData[[1]]) # Atlanta
## Date Max.TemperatureF Mean.TemperatureF Min.TemperatureF
## 1 2015-1-1 58 45 32
## 2 2015-1-2 49 46 43
## 3 2015-1-3 62 55 48
## 4 2015-1-4 66 55 44
## 5 2015-1-5 48 41 34
## 6 2015-1-6 54 42 30
## Max.Humidity Events Airport
## 1 78 Rain KATL
## 2 93 Rain KATL
## 3 100 Fog-Rain KATL
## 4 97 Fog-Rain-Thunderstorm KATL
## 5 76 KATL
## 6 72 KATL
tail(TempData[[5]])
## Date Max.TemperatureF Mean.TemperatureF Min.TemperatureF
## 354 2015-12-20 54 39 23
## 355 2015-12-21 56 48 39
## 356 2015-12-22 63 57 50
## 357 2015-12-23 67 63 59
## 358 2015-12-24 73 68 63
## 359 2015-12-25 74 69 63
## Max.Humidity Events Airport
## 354 81 KCLT
## 355 74 KCLT
## 356 100 Fog-Rain KCLT
## 357 97 Fog-Rain-Thunderstorm KCLT
## 358 93 Rain KCLT
## 359 93 Fog-Rain-Thunderstorm KCLT