1. Introdução

Iniciaremos uma série de tutoriais cujo objetivo é executar um processo de análise de um conjunto de dados reais. Vamos começar preparando os dados e, em seguida, usar algumas técnicas de Análise Exploratória de Dados (Exploratory Data Analysis, EDA, em inglês) para termos uma visão geral dos dados.

2. Dados

O conjunto de dados consiste de registros diários de vários parâmetros meteorológicos, medidos na cidade do Porto (Portugal) em relação ao ano de 2014. Temos, então, 365 observações para cada uma das 14 variáveis seguintes:

day.count - número de dias passados desde o início do ano

day - dia do mês

month - mês do ano

3. Preparação dos dados

Os dados serão retirados de Weather_2014.csv, arquivo enviado pelo professor Washington via email.

Desenvolvimento do exercício:

# 3.1 Importando os dados:

# setando diretorio C/DOCUMENTOS/MESTRADO
setwd("~/_MESTRADO_IFMG_2015/ANALISE_DADOS/RMARKDOWN")

#exibindo o diretorio atual
getwd()
## [1] "C:/Users/Gustavo/Documents/_MESTRADO_IFMG_2015/ANALISE_DADOS/RMARKDOWN"
# importando o arquivo com a função read.csv()

clima <- read.csv("weather_2014.csv",sep=";",stringsAsFactors=FALSE)

# Lista os objetos ativos
ls()
## [1] "clima"
# Criando um objeto
x <- seq(1:100)
x
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
##  [18]  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34
##  [35]  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51
##  [52]  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68
##  [69]  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
##  [86]  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100
ls()
## [1] "clima" "x"
rm(x)
ls()
## [1] "clima"
# Classe da estrutura de dados
class(clima)
## [1] "data.frame"
# Dimensões: n de observações e n. de variáveis
dim(clima)
## [1] 365  14
# Exibe o nome das variáveis
names(clima)
##  [1] "day.count"      "day"            "month"          "season"        
##  [5] "l.temp"         "h.temp"         "ave.temp"       "l.temp.time"   
##  [9] "h.temp.time"    "rain"           "ave.wind"       "gust.wind"     
## [13] "gust.wind.time" "dir.wind"
# Exibe as seis primeiras linhas do objeto
head(clima)
##   day.count day month season l.temp h.temp ave.temp l.temp.time
## 1         1   1     1 Winter   12.7   14.0     13.4       01:25
## 2         2   2     1 Winter   11.3   14.7     13.5       07:30
## 3         3   3     1 Winter   12.6   14.7     13.6       21:00
## 4         4   4     1 Winter    7.7   13.9     11.3       10:35
## 5         5   5     1 Winter    8.8   14.6     13.0       01:40
## 6         6   6     1 Winter   11.8   14.4     13.1       19:35
##   h.temp.time rain ave.wind gust.wind gust.wind.time dir.wind
## 1       23:50 32.0     11.4      53.1          15:45        S
## 2       11:15 64.8      5.6      41.8          22:25        S
## 3       14:00 12.7      4.3      38.6          00:00      SSW
## 4       01:50 20.1     10.3      66.0          09:05       SW
## 5       12:55  9.4     11.6      51.5          13:50      SSE
## 6       00:05 38.9      9.9      57.9          08:10      SSE
# Exibe as seis últimas linhas
tail(clima)
##     day.count day month season l.temp h.temp ave.temp l.temp.time
## 360       360  26    12 Winter    6.6   15.3     10.0       06:57
## 361       361  27    12 Winter    4.4   12.9      8.3       04:24
## 362       362  28    12 Winter    7.4   13.6     11.5       22:41
## 363       363  29    12 Winter    5.7   11.4      8.4       07:59
## 364       364  30    12 Winter    6.3   13.3      9.3       05:05
## 365       365  31    12 Winter    5.1   13.9      8.9       08:11
##     h.temp.time rain ave.wind gust.wind gust.wind.time dir.wind
## 360       13:42  0.0      0.7      16.1          15:15      ENE
## 361       14:38  0.3      0.8      14.5          14:35      NNW
## 362       13:09  4.3      4.1      40.2          11:55       NW
## 363       15:25  0.0      9.8      51.5          11:45      ENE
## 364       15:44  0.0      6.5      38.6          00:40        E
## 365       13:43  0.0      0.2      20.9          00:03       NE
# Sumário
summary(clima)
##    day.count        day            month           season         
##  Min.   :  1   Min.   : 1.00   Min.   : 1.000   Length:365        
##  1st Qu.: 92   1st Qu.: 8.00   1st Qu.: 4.000   Class :character  
##  Median :183   Median :16.00   Median : 7.000   Mode  :character  
##  Mean   :183   Mean   :15.72   Mean   : 6.526                     
##  3rd Qu.:274   3rd Qu.:23.00   3rd Qu.:10.000                     
##  Max.   :365   Max.   :31.00   Max.   :12.000                     
##      l.temp          h.temp         ave.temp     l.temp.time       
##  Min.   : 3.10   Min.   : 9.80   Min.   : 7.30   Length:365        
##  1st Qu.: 9.10   1st Qu.:14.40   1st Qu.:12.00   Class :character  
##  Median :12.90   Median :19.10   Median :15.80   Mode  :character  
##  Mean   :12.65   Mean   :19.19   Mean   :15.74                     
##  3rd Qu.:16.30   3rd Qu.:23.30   3rd Qu.:19.30                     
##  Max.   :22.60   Max.   :31.50   Max.   :26.60                     
##  h.temp.time             rain           ave.wind        gust.wind    
##  Length:365         Min.   : 0.000   Min.   : 0.000   Min.   : 3.20  
##  Class :character   1st Qu.: 0.000   1st Qu.: 2.300   1st Qu.:22.50  
##  Mode  :character   Median : 0.300   Median : 3.500   Median :29.00  
##                     Mean   : 5.843   Mean   : 4.044   Mean   :31.15  
##                     3rd Qu.: 5.300   3rd Qu.: 5.200   3rd Qu.:38.60  
##                     Max.   :74.900   Max.   :16.600   Max.   :86.90  
##  gust.wind.time       dir.wind        
##  Length:365         Length:365        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
# 3.2 Verificando dados faltantes:

# 1: Somando o n. de NA em todo o conjunto de dados
sum(is.na(clima))
## [1] 0
# 2: Pelo n. de casos *completos*
sum(!complete.cases(clima))
## [1] 0
# 3.3 Especificando elementos de uma dataframe:

# Extrai a primeira linha
ind1 <- clima[1,]

# Extrai a primeira coluna
dia <- clima[,1]

# Elemento da 1 linha e 2 coluna
clima[1,2]
## [1] 1
# Extraindo pelo nome
clima[c("season","dir.wind")]
##     season dir.wind
## 1   Winter        S
## 2   Winter        S
## 3   Winter      SSW
## 4   Winter       SW
## 5   Winter      SSE
## 6   Winter      SSE
## 7   Winter       SE
## 8   Winter       SE
## 9   Winter        N
## 10  Winter      NNW
## 11  Winter       NE
## 12  Winter      SSE
## 13  Winter      WNW
## 14  Winter      WNW
## 15  Winter      SSW
## 16  Winter       SW
## 17  Winter       SE
## 18  Winter      NNW
## 19  Winter       NW
## 20  Winter        N
## 21  Winter       SE
## 22  Winter       NW
## 23  Winter        N
## 24  Winter      NNW
## 25  Winter      NNW
## 26  Winter       NW
## 27  Winter       NW
## 28  Winter      WSW
## 29  Winter       NW
## 30  Winter       NW
## 31  Winter      SSW
## 32  Winter       NW
## 33  Winter       SE
## 34  Winter      SSW
## 35  Winter        S
## 36  Winter      SSW
## 37  Winter      SSW
## 38  Winter      SSW
## 39  Winter        W
## 40  Winter      SSE
## 41  Winter       NW
## 42  Winter       NW
## 43  Winter      SSE
## 44  Winter        S
## 45  Winter        S
## 46  Winter       NW
## 47  Winter       NE
## 48  Winter       SE
## 49  Winter      NNW
## 50  Winter       SE
## 51  Winter      SSW
## 52  Winter       NW
## 53  Winter        S
## 54  Winter       SE
## 55  Winter       SW
## 56  Winter       SW
## 57  Winter       NW
## 58  Winter      SSW
## 59  Winter      WNW
## 60  Winter      WSW
## 61  Winter        W
## 62  Winter      WNW
## 63  Winter      WNW
## 64  Winter       NW
## 65  Winter      NNW
## 66  Winter       NW
## 67  Winter      NNW
## 68  Winter        E
## 69  Winter      NNE
## 70  Winter       NE
## 71  Winter       NE
## 72  Winter       NW
## 73  Winter       NW
## 74  Winter      NNW
## 75  Winter       NE
## 76  Winter       NW
## 77  Winter      NNW
## 78  Winter      ENE
## 79  Winter      WNW
## 80  Spring      SSE
## 81  Spring       NW
## 82  Spring       NW
## 83  Spring       NW
## 84  Spring      WNW
## 85  Spring       NW
## 86  Spring       NE
## 87  Spring      NNE
## 88  Spring       NW
## 89  Spring       SE
## 90  Spring       SE
## 91  Spring       SE
## 92  Spring      SSE
## 93  Spring      WNW
## 94  Spring      SSE
## 95  Spring        S
## 96  Spring        S
## 97  Spring        S
## 98  Spring      NNW
## 99  Spring      ENE
## 100 Spring       NW
## 101 Spring       NW
## 102 Spring       NW
## 103 Spring       NW
## 104 Spring      NNE
## 105 Spring      SSW
## 106 Spring       NW
## 107 Spring       NW
## 108 Spring        W
## 109 Spring       NW
## 110 Spring      NNW
## 111 Spring       NW
## 112 Spring        S
## 113 Spring      WNW
## 114 Spring      WNW
## 115 Spring      SSE
## 116 Spring        W
## 117 Spring       NW
## 118 Spring       NW
## 119 Spring       NW
## 120 Spring       NW
## 121 Spring       NW
## 122 Spring      NNW
## 123 Spring       NE
## 124 Spring      ENE
## 125 Spring       NW
## 126 Spring       NW
## 127 Spring       NW
## 128 Spring       NW
## 129 Spring       NW
## 130 Spring       NW
## 131 Spring       NW
## 132 Spring       NW
## 133 Spring      NNW
## 134 Spring      ENE
## 135 Spring      ENE
## 136 Spring      NNE
## 137 Spring       NE
## 138 Spring       SW
## 139 Spring       SW
## 140 Spring       SE
## 141 Spring      SSE
## 142 Spring       SW
## 143 Spring       NW
## 144 Spring       NW
## 145 Spring       NW
## 146 Spring       NW
## 147 Spring       NW
## 148 Spring      WNW
## 149 Spring       SW
## 150 Spring       NW
## 151 Spring      NNW
## 152 Spring        N
## 153 Spring       NW
## 154 Spring       NW
## 155 Spring       NW
## 156 Spring      SSE
## 157 Spring      SSE
## 158 Spring      SSE
## 159 Spring        S
## 160 Spring      SSW
## 161 Spring       NW
## 162 Spring       NW
## 163 Spring       NW
## 164 Spring       NW
## 165 Spring       NW
## 166 Spring       NE
## 167 Spring       NE
## 168 Spring       NE
## 169 Spring      WNW
## 170 Spring       NW
## 171 Spring       NW
## 172 Summer       SE
## 173 Summer      SSE
## 174 Summer       NW
## 175 Summer      ENE
## 176 Summer       NW
## 177 Summer       NW
## 178 Summer      WNW
## 179 Summer       NW
## 180 Summer       NW
## 181 Summer      WNW
## 182 Summer      WNW
## 183 Summer       NW
## 184 Summer       NW
## 185 Summer      WNW
## 186 Summer        S
## 187 Summer      WNW
## 188 Summer       NW
## 189 Summer       NW
## 190 Summer        N
## 191 Summer       NE
## 192 Summer      NNE
## 193 Summer       NW
## 194 Summer       NW
## 195 Summer       NW
## 196 Summer       NW
## 197 Summer       NW
## 198 Summer      SSW
## 199 Summer      SSE
## 200 Summer      SSW
## 201 Summer      WNW
## 202 Summer       NW
## 203 Summer       NW
## 204 Summer      WNW
## 205 Summer      WNW
## 206 Summer       NW
## 207 Summer      SSW
## 208 Summer       NW
## 209 Summer       NW
## 210 Summer        N
## 211 Summer      ENE
## 212 Summer        S
## 213 Summer       SW
## 214 Summer      SSW
## 215 Summer      WNW
## 216 Summer       NW
## 217 Summer       NW
## 218 Summer       NW
## 219 Summer      WNW
## 220 Summer       SW
## 221 Summer      SSW
## 222 Summer        S
## 223 Summer       NW
## 224 Summer        S
## 225 Summer       NW
## 226 Summer      NNW
## 227 Summer       NW
## 228 Summer        N
## 229 Summer       NE
## 230 Summer        S
## 231 Summer      WNW
## 232 Summer        S
## 233 Summer       NW
## 234 Summer       NW
## 235 Summer       NW
## 236 Summer       NW
## 237 Summer       NW
## 238 Summer        S
## 239 Summer      WNW
## 240 Summer       NW
## 241 Summer      NNW
## 242 Summer       NW
## 243 Summer       NW
## 244 Summer       NW
## 245 Summer      SSW
## 246 Summer        S
## 247 Summer       NW
## 248 Summer       NW
## 249 Summer      SSE
## 250 Summer        S
## 251 Summer       NW
## 252 Summer       NW
## 253 Summer        S
## 254 Summer      SSE
## 255 Summer      SSE
## 256 Summer        S
## 257 Summer      SSE
## 258 Summer       SE
## 259 Summer       SE
## 260 Summer      SSE
## 261 Summer      SSE
## 262 Summer      SSE
## 263 Summer      SSE
## 264 Autumn      NNE
## 265 Autumn      NNW
## 266 Autumn        N
## 267 Autumn      NNW
## 268 Autumn       NE
## 269 Autumn       NE
## 270 Autumn       NE
## 271 Autumn       NE
## 272 Autumn        N
## 273 Autumn      NNW
## 274 Autumn        N
## 275 Autumn       NE
## 276 Autumn       NW
## 277 Autumn       NW
## 278 Autumn      NNW
## 279 Autumn      SSE
## 280 Autumn      SSE
## 281 Autumn        S
## 282 Autumn      NNW
## 283 Autumn       NW
## 284 Autumn       SE
## 285 Autumn        E
## 286 Autumn       SE
## 287 Autumn       SE
## 288 Autumn      SSE
## 289 Autumn        S
## 290 Autumn      SSE
## 291 Autumn      SSE
## 292 Autumn       NE
## 293 Autumn       NE
## 294 Autumn      ESE
## 295 Autumn       NE
## 296 Autumn       NE
## 297 Autumn       NE
## 298 Autumn      ENE
## 299 Autumn       NE
## 300 Autumn       SE
## 301 Autumn      ESE
## 302 Autumn       SE
## 303 Autumn      SSE
## 304 Autumn      SSE
## 305 Autumn      NNW
## 306 Autumn      NNE
## 307 Autumn      NNW
## 308 Autumn      NNW
## 309 Autumn      NNW
## 310 Autumn       SE
## 311 Autumn       NW
## 312 Autumn       NW
## 313 Autumn      NNW
## 314 Autumn       SE
## 315 Autumn       SW
## 316 Autumn        S
## 317 Autumn        S
## 318 Autumn      SSE
## 319 Autumn      WSW
## 320 Autumn        W
## 321 Autumn      NNW
## 322 Autumn       SE
## 323 Autumn      ENE
## 324 Autumn        E
## 325 Autumn      ENE
## 326 Autumn      NNW
## 327 Autumn        N
## 328 Autumn        N
## 329 Autumn       NW
## 330 Autumn        E
## 331 Autumn        E
## 332 Autumn        E
## 333 Autumn        N
## 334 Autumn        N
## 335 Autumn      NNW
## 336 Autumn        N
## 337 Autumn      NNW
## 338 Autumn      NNW
## 339 Autumn      NNW
## 340 Autumn        N
## 341 Autumn      NNW
## 342 Autumn      NNW
## 343 Autumn      NNW
## 344 Autumn       NW
## 345 Autumn        N
## 346 Autumn       SE
## 347 Autumn        N
## 348 Autumn      NNW
## 349 Autumn       NW
## 350 Autumn       NW
## 351 Autumn       NW
## 352 Autumn      NNE
## 353 Autumn      ENE
## 354 Autumn      ENE
## 355 Winter        E
## 356 Winter        E
## 357 Winter        E
## 358 Winter        E
## 359 Winter      ENE
## 360 Winter      ENE
## 361 Winter      NNW
## 362 Winter       NW
## 363 Winter      ENE
## 364 Winter        E
## 365 Winter       NE
# Usando o operador $
clima$ave.temp
##   [1] 13.4 13.5 13.6 11.3 13.0 13.1 13.5 14.1 12.9 11.0  9.5 12.1 10.2 12.8
##  [15] 12.0 10.8  9.3  9.0  8.7  9.2 11.0 11.0  9.5  9.4 12.4 11.9 10.3 10.6
##  [29]  8.9  8.6 12.1  9.3  8.3  8.9 10.3 11.6 12.5 11.3 10.7  9.9  8.8 10.4
##  [43] 11.2 13.5 12.2  9.6  7.3  8.9  9.4 10.8 11.7  9.5 11.5 12.1 10.6 11.7
##  [57] 10.9 12.2 11.8 12.2 12.1 10.1 10.8 12.1 11.2 14.2 16.5 17.2 17.4 17.2
##  [71] 16.1 12.4 12.6 14.2 16.1 12.5 13.3 16.4 12.9 12.3 10.9 10.2 11.3 11.3
##  [85]  9.7  9.6 10.5  9.4 10.8 13.0 12.4 12.2 11.6 12.3 14.4 14.4 14.5 16.3
##  [99] 20.7 16.9 15.7 16.7 17.0 19.5 18.7 16.4 18.4 14.2 13.6 14.3 13.5 15.1
## [113] 14.1 11.7 12.1 14.4 14.0 14.4 15.7 15.7 15.1 19.3 20.1 20.6 15.8 15.5
## [127] 15.9 15.5 16.6 15.9 15.9 15.7 17.4 21.7 23.4 22.6 21.2 18.4 13.6 11.8
## [141] 11.9 14.2 13.4 12.6 12.6 13.2 13.6 15.0 14.8 16.3 17.9 19.1 16.7 15.4
## [155] 15.0 15.5 16.1 16.8 17.2 17.1 17.0 19.9 24.1 23.0 26.2 25.7 24.7 24.0
## [169] 19.6 18.6 19.2 18.5 18.2 18.9 18.4 17.3 18.1 18.4 17.8 17.2 17.5 17.5
## [183] 18.6 20.7 18.2 18.3 18.0 16.9 18.3 22.5 24.3 23.8 22.9 20.3 20.7 19.6
## [197] 19.7 18.2 18.3 18.4 18.9 20.3 21.1 18.8 19.2 20.3 19.1 19.2 21.1 24.8
## [211] 26.6 20.8 18.5 18.4 18.4 18.2 19.7 20.3 21.5 21.4 21.0 20.5 19.3 18.6
## [225] 17.6 18.2 19.1 21.9 25.5 20.6 17.8 18.0 17.5 18.3 18.0 17.4 19.3 20.9
## [239] 22.1 20.2 19.3 20.3 20.2 23.2 21.6 21.1 20.9 19.9 20.8 20.4 21.2 21.5
## [253] 22.1 22.1 22.0 21.5 20.4 20.6 21.1 20.3 18.3 19.3 20.1 20.3 18.1 19.5
## [267] 19.4 20.3 20.6 19.8 18.4 17.6 18.7 22.4 22.8 20.6 18.9 17.4 16.8 19.1
## [281] 19.9 17.1 16.1 16.4 17.0 14.7 15.2 18.6 19.0 19.5 19.4 21.5 23.2 22.3
## [295] 21.6 22.0 22.3 23.2 20.1 19.0 19.4 20.2 20.5 19.7 16.9 13.7 14.6 11.1
## [309] 11.5 13.0 14.9 13.0 11.3 11.9 14.4 15.2 15.8 12.5 13.1 13.8 13.0 13.4
## [323] 14.3 16.8 17.3 14.7 14.1 15.7 13.9 12.3 13.1 11.6 11.6 14.7 13.4 11.1
## [337] 11.7  9.8  8.5  9.3  9.6  9.3  9.9  8.1  9.2 10.1 10.0  9.9 10.0 12.7
## [351] 12.0  9.0  7.6  9.5 11.3 10.1 10.1 10.9  9.5 10.0  8.3 11.5  8.4  9.3
## [365]  8.9
# O comando subsset
ndados <- subset(clima,ave.temp > 25, select = c("ave.temp","rain"))
ndados
##     ave.temp rain
## 165     26.2    0
## 166     25.7    0
## 211     26.6    0
## 229     25.5    0
# 3.4 Criando os fatores

# Antes (365 strings independentes)
class(clima$season)
## [1] "character"
summary(clima$season)
##    Length     Class      Mode 
##       365 character character
clima$season <- factor(clima$season,levels=c("Spring","Summer","Autumn","Winter"))

# Depois (4 categories, ordered by "levels")
class(clima$season)
## [1] "factor"
summary(clima$season)
## Spring Summer Autumn Winter 
##     92     92     91     90
# usando as.factor() quando a ordem não é importante
clima$day <- as.factor(clima$day)
clima$month <- as.factor(clima$month)
clima$dir.wind <- as.factor(clima$dir.wind)