Realizamos o download e storage dos dados, dataset da UCI https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset
1. Definimos o diretorio padr??op dos dados
2. Realizamos o download da base de dados
3. Verificamos se a pasta de dados existe, caso n??o exista a criamos
4. Extraimos os dados zipados para a pasta de dados
dataDir = "data"
#download.file("https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip", "BikeSharing/Bike-Sharing-Dataset.zip")
if (file.exists(dataDir)){
setwd(file.path(getwd(), dataDir))
} else {
dir.create(file.path(getwd(), dataDir))
}
unzip("Bike-Sharing-Dataset.zip", exdir = "data", unzip = "internal")
## Warning in unzip("Bike-Sharing-Dataset.zip", exdir = "data", unzip =
## "internal"): error 1 in extracting from zip file
list.files("data/")
## character(0)
Tempo:
1: Ceu limpo, Algumas nuvens, Parcialmente nublado, Parcialmente nublado -
2: Nevoa + Nevoa, Nevoa + Nuvens quebradas, Nevoa + Algumas nuvens, Nevoa -
3: Neve leve, Chuva leve + Trovoada + Nuvens dispersas, Chuva leve + Nuvens dispersas -
4: chuva pesada + paletes de gelo + tempestade + nevoa, neve + nevoeiro
day = read.csv("data/day.csv", header = T, sep = ",")
head(day, 25)
## instant dteday season yr mnth holiday weekday workingday weathersit
## 1 1 2011-01-01 1 0 1 0 6 0 2
## 2 2 2011-01-02 1 0 1 0 0 0 2
## 3 3 2011-01-03 1 0 1 0 1 1 1
## 4 4 2011-01-04 1 0 1 0 2 1 1
## 5 5 2011-01-05 1 0 1 0 3 1 1
## 6 6 2011-01-06 1 0 1 0 4 1 1
## 7 7 2011-01-07 1 0 1 0 5 1 2
## 8 8 2011-01-08 1 0 1 0 6 0 2
## 9 9 2011-01-09 1 0 1 0 0 0 1
## 10 10 2011-01-10 1 0 1 0 1 1 1
## 11 11 2011-01-11 1 0 1 0 2 1 2
## 12 12 2011-01-12 1 0 1 0 3 1 1
## 13 13 2011-01-13 1 0 1 0 4 1 1
## 14 14 2011-01-14 1 0 1 0 5 1 1
## 15 15 2011-01-15 1 0 1 0 6 0 2
## 16 16 2011-01-16 1 0 1 0 0 0 1
## 17 17 2011-01-17 1 0 1 1 1 0 2
## 18 18 2011-01-18 1 0 1 0 2 1 2
## 19 19 2011-01-19 1 0 1 0 3 1 2
## 20 20 2011-01-20 1 0 1 0 4 1 2
## 21 21 2011-01-21 1 0 1 0 5 1 1
## 22 22 2011-01-22 1 0 1 0 6 0 1
## 23 23 2011-01-23 1 0 1 0 0 0 1
## 24 24 2011-01-24 1 0 1 0 1 1 1
## 25 25 2011-01-25 1 0 1 0 2 1 2
## temp atemp hum windspeed casual registered cnt
## 1 0.3441670 0.3636250 0.805833 0.1604460 331 654 985
## 2 0.3634780 0.3537390 0.696087 0.2485390 131 670 801
## 3 0.1963640 0.1894050 0.437273 0.2483090 120 1229 1349
## 4 0.2000000 0.2121220 0.590435 0.1602960 108 1454 1562
## 5 0.2269570 0.2292700 0.436957 0.1869000 82 1518 1600
## 6 0.2043480 0.2332090 0.518261 0.0895652 88 1518 1606
## 7 0.1965220 0.2088390 0.498696 0.1687260 148 1362 1510
## 8 0.1650000 0.1622540 0.535833 0.2668040 68 891 959
## 9 0.1383330 0.1161750 0.434167 0.3619500 54 768 822
## 10 0.1508330 0.1508880 0.482917 0.2232670 41 1280 1321
## 11 0.1690910 0.1914640 0.686364 0.1221320 43 1220 1263
## 12 0.1727270 0.1604730 0.599545 0.3046270 25 1137 1162
## 13 0.1650000 0.1508830 0.470417 0.3010000 38 1368 1406
## 14 0.1608700 0.1884130 0.537826 0.1265480 54 1367 1421
## 15 0.2333330 0.2481120 0.498750 0.1579630 222 1026 1248
## 16 0.2316670 0.2342170 0.483750 0.1884330 251 953 1204
## 17 0.1758330 0.1767710 0.537500 0.1940170 117 883 1000
## 18 0.2166670 0.2323330 0.861667 0.1467750 9 674 683
## 19 0.2921740 0.2984220 0.741739 0.2083170 78 1572 1650
## 20 0.2616670 0.2550500 0.538333 0.1959040 83 1844 1927
## 21 0.1775000 0.1578330 0.457083 0.3532420 75 1468 1543
## 22 0.0591304 0.0790696 0.400000 0.1719700 93 888 981
## 23 0.0965217 0.0988391 0.436522 0.2466000 150 836 986
## 24 0.0973913 0.1179300 0.491739 0.1583300 86 1330 1416
## 25 0.2234780 0.2345260 0.616957 0.1297960 186 1799 1985
names(day)
## [1] "instant" "dteday" "season" "yr" "mnth"
## [6] "holiday" "weekday" "workingday" "weathersit" "temp"
## [11] "atemp" "hum" "windspeed" "casual" "registered"
## [16] "cnt"
names(day) <- c("dia", "data", "estacao", "ano", "mes", "feriado", "dia.semana", "dia.util", "tempo", "temp", "sensacao", "hum", "vento", "casual", "registrado", "total")
names(day)
## [1] "dia" "data" "estacao" "ano" "mes"
## [6] "feriado" "dia.semana" "dia.util" "tempo" "temp"
## [11] "sensacao" "hum" "vento" "casual" "registrado"
## [16] "total"
head(day, 5)
## dia data estacao ano mes feriado dia.semana dia.util tempo
## 1 1 2011-01-01 1 0 1 0 6 0 2
## 2 2 2011-01-02 1 0 1 0 0 0 2
## 3 3 2011-01-03 1 0 1 0 1 1 1
## 4 4 2011-01-04 1 0 1 0 2 1 1
## 5 5 2011-01-05 1 0 1 0 3 1 1
## temp sensacao hum vento casual registrado total
## 1 0.344167 0.363625 0.805833 0.160446 331 654 985
## 2 0.363478 0.353739 0.696087 0.248539 131 670 801
## 3 0.196364 0.189405 0.437273 0.248309 120 1229 1349
## 4 0.200000 0.212122 0.590435 0.160296 108 1454 1562
## 5 0.226957 0.229270 0.436957 0.186900 82 1518 1600
tipo <- c("bom", "nevoeiro", "Chuva_leve_Trovoada", "chuvaforte")
num <- c(1,2,3,4)
tempotipo <-cbind(num, tipo)
tempotipo <- as.data.frame(tempotipo)
final <- merge(day, tempotipo, by.x = "tempo", by.y = "num")
head(final)
## tempo dia data estacao ano mes feriado dia.semana dia.util
## 1 1 151 2011-05-31 2 0 5 0 2 1
## 2 1 50 2011-02-19 1 0 2 0 6 0
## 3 1 157 2011-06-06 2 0 6 0 1 1
## 4 1 110 2011-04-20 2 0 4 0 3 1
## 5 1 4 2011-01-04 1 0 1 0 2 1
## 6 1 136 2011-05-16 2 0 5 0 1 1
## temp sensacao hum vento casual registrado total tipo
## 1 0.775000 0.725383 0.636667 0.111329 673 3309 3982 bom
## 2 0.399167 0.391404 0.187917 0.507463 532 1103 1635 bom
## 3 0.678333 0.621858 0.600000 0.121896 673 3875 4548 bom
## 4 0.595000 0.564392 0.614167 0.241925 613 3331 3944 bom
## 5 0.200000 0.212122 0.590435 0.160296 108 1454 1562 bom
## 6 0.577500 0.550512 0.787917 0.126871 773 3185 3958 bom