Cidades_digitais.utf8.md

# Análise das obras do governo sobre cidades digitais
# Silvio Cesar Lima
# Fonte:
# https://www.mctic.gov.br/mctic/opencms/indicadores/detalhe/Cidades-Digitais-Lista-de-Cidades-Atendidas-2.html

# Carregando os pacotes
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

library(ggplot2)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(leaflet)
library(gmodels)


# Carregando as bases de dados
df <- read.csv("cidades_digitais.csv", sep=',',stringsAsFactors = FALSE)
regioes<-read.csv("REGIAO.csv",stringsAsFactors = FALSE)

LatLong<-read.csv("LatLong.csv",sep=',',stringsAsFactors = FALSE)

# Unindo as bases a partir de campos comuns.
df_1 <- merge(df, LatLong, by=c("CIDADE"))
df <- merge(df_1, regioes, by=c("UF"))

# Removendo coluna desnecessária
colnames(df)

##  [1] "UF"                   "CIDADE"               "IBGE"                
##  [4] "POPULAÇÃO"            "VALOR_TOTAL_PREVISTO" "STATUS"              
##  [7] "PONTOS_ATENDIDOS"     "VALOR_INVESTIDO"      "LAT"                 
## [10] "LONG"                 "ESTADO"               "REGIAO"

df$IBGE<-NULL

# Resumo 
str(df)

## 'data.frame':    361 obs. of  11 variables:
##  $ UF                  : chr  "AC" "AL" "AL" "AM" ...
##  $ CIDADE              : chr  "Tarauacá" "Estrela de Alagoas" "Delmiro Gouveia" "Maraã" ...
##  $ POPULAÇÃO           : int  38819 18306 51997 18423 26777 37033 36435 28413 94175 45984 ...
##  $ VALOR_TOTAL_PREVISTO: num  484548 527207 1169382 427517 757707 ...
##  $ STATUS              : chr  "Sem Previsao" "Sem Previsao" "Sem Previsao" "Sem Previsao" ...
##  $ PONTOS_ATENDIDOS    : int  8 39 43 21 36 51 17 21 31 30 ...
##  $ VALOR_INVESTIDO     : num  0 0 0 0 0 ...
##  $ LAT                 : num  -8.16 -9.39 -9.39 -1.85 -3.83 ...
##  $ LONG                : num  -70.8 -36.8 -38 -65.6 -62.1 ...
##  $ ESTADO              : chr  "Acre" "Alagoas" "Alagoas" "Amazonas" ...
##  $ REGIAO              : chr  "Norte" "Nordeste" "Nordeste" "Norte" ...

# # QUAIS OS 10 MAIORES VALORES PREVISTOS DE SEREM INVESTIDOS ?
#
# Ordenação em ordem decrescente
df_sort <- df[order(df$VALOR_TOTAL_PREVISTO,decreasing = TRUE),]
df_sort%>%select('UF','CIDADE','VALOR_TOTAL_PREVISTO')%>%head(10)

##     UF                   CIDADE VALOR_TOTAL_PREVISTO
## 344 SP                    Tietê              1980603
## 358 TO     Paraíso do Tocantins              1872694
## 154 MS               Aquidauana              1796472
## 342 SP                    Jales              1705825
## 333 SP Espírito Santo do Pinhal              1661170
## 329 SP       Araçoiaba da Serra              1625188
## 357 SP                 Orlândia              1595135
## 347 SP                    Ibaté              1584847
## 352 SP     São Joaquim da Barra              1536582
## 327 SP     Vargem Grande do Sul              1525641

#
# Valores previstos por faixa
valor_previsto_obra=df$VALOR_TOTAL_PREVISTO
hist(valor_previsto_obra)

# Detalhes do histograma gerado
histinfo<-hist(valor_previsto_obra)

histinfo

## $breaks
##  [1]  200000  400000  600000  800000 1000000 1200000 1400000 1600000
##  [9] 1800000 2000000
## 
## $counts
## [1]  38 123  78  61  28  17  10   4   2
## 
## $density
## [1] 5.263158e-07 1.703601e-06 1.080332e-06 8.448753e-07 3.878116e-07
## [6] 2.354571e-07 1.385042e-07 5.540166e-08 2.770083e-08
## 
## $mids
## [1]  300000  500000  700000  900000 1100000 1300000 1500000 1700000 1900000
## 
## $xname
## [1] "valor_previsto_obra"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

#
# Apresenta a frequencia de valores em determinada faixa.
# Nesse caso, há 123 valores previstos na faixa entre 400 e 600 mil.
histinfo$counts

## [1]  38 123  78  61  28  17  10   4   2

#
##  QUAL CIDADE RECEBERÁ O MAIOR VALOR PREVISTO ?
#
df_max_valor_previsto<- filter(df,VALOR_TOTAL_PREVISTO == max(valor_previsto_obra))
df_max_valor_previsto

##   UF CIDADE POPULAÇÃO VALOR_TOTAL_PREVISTO       STATUS PONTOS_ATENDIDOS
## 1 SP  Tietê     40194              1980603 Sem Previsao               44
##   VALOR_INVESTIDO      LAT     LONG    ESTADO  REGIAO
## 1               0 -23.1101 -47.7164 São Paulo Sudeste

#
## QUANTAS CIDADES ESTÃO PREVISTAS POR REGIÃO ?
#
df_Reg_cidades<- df%>%group_by(REGIAO)%>%tally()
df_n<-as.data.frame(df_Reg_cidades)
setnames(df_n, "n", "Total_cidades")
df_Reg_cidades<-df_n[order(df_n$Total_cidades,decreasing=TRUE),]
df_Reg_cidades

##         REGIAO Total_cidades
## 2     Nordeste           168
## 4      Sudeste            85
## 3        Norte            48
## 5          Sul            43
## 1 Centro-oeste            17

#
## QUANTAS CIDADES ESTÃO PREVISTAS POR UF ?
#
df_UF_cidades<- df%>%group_by(UF)%>%tally()
df_n<-as.data.frame(df_UF_cidades)
setnames(df_n, "n", "Total_cidades")
df_UF_cidades<-df_n[order(df_n$Total_cidades,decreasing=TRUE),]
df_UF_cidades

##    UF Total_cidades
## 25 SP            36
## 5  BA            34
## 6  CE            29
## 9  MA            29
## 10 MG            28
## 13 PA            24
## 15 PE            21
## 17 PR            21
## 14 PB            19
## 16 PI            19
## 18 RJ            17
## 22 RS            16
## 8  GO            10
## 19 RN            10
## 3  AM             9
## 4  AP             6
## 23 SC             6
## 24 SE             5
## 7  ES             4
## 12 MT             4
## 26 TO             4
## 11 MS             3
## 20 RO             3
## 2  AL             2
## 1  AC             1
## 21 RR             1

#
## QUAL O NÚMERO DE CIDADES POR STATUS DAS OBRAS ?
#
table(df$STATUS)

## 
##    Concluida Em andamento Sem Previsao 
##           74           13          274

#
# Total de obras por status e região 
CrossTable(df$REGIAO, df$STATUS)

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  361 
## 
##  
##              | df$STATUS 
##    df$REGIAO |    Concluida | Em andamento | Sem Previsao |    Row Total | 
## -------------|--------------|--------------|--------------|--------------|
## Centro-oeste |            0 |            0 |           17 |           17 | 
##              |        3.485 |        0.612 |        1.301 |              | 
##              |        0.000 |        0.000 |        1.000 |        0.047 | 
##              |        0.000 |        0.000 |        0.062 |              | 
##              |        0.000 |        0.000 |        0.047 |              | 
## -------------|--------------|--------------|--------------|--------------|
##     Nordeste |           36 |            4 |          128 |          168 | 
##              |        0.071 |        0.695 |        0.002 |              | 
##              |        0.214 |        0.024 |        0.762 |        0.465 | 
##              |        0.486 |        0.308 |        0.467 |              | 
##              |        0.100 |        0.011 |        0.355 |              | 
## -------------|--------------|--------------|--------------|--------------|
##        Norte |           13 |            0 |           35 |           48 | 
##              |        1.015 |        1.729 |        0.056 |              | 
##              |        0.271 |        0.000 |        0.729 |        0.133 | 
##              |        0.176 |        0.000 |        0.128 |              | 
##              |        0.036 |        0.000 |        0.097 |              | 
## -------------|--------------|--------------|--------------|--------------|
##      Sudeste |           11 |            5 |           69 |           85 | 
##              |        2.368 |        1.228 |        0.312 |              | 
##              |        0.129 |        0.059 |        0.812 |        0.235 | 
##              |        0.149 |        0.385 |        0.252 |              | 
##              |        0.030 |        0.014 |        0.191 |              | 
## -------------|--------------|--------------|--------------|--------------|
##          Sul |           14 |            4 |           25 |           43 | 
##              |        3.051 |        3.881 |        1.787 |              | 
##              |        0.326 |        0.093 |        0.581 |        0.119 | 
##              |        0.189 |        0.308 |        0.091 |              | 
##              |        0.039 |        0.011 |        0.069 |              | 
## -------------|--------------|--------------|--------------|--------------|
## Column Total |           74 |           13 |          274 |          361 | 
##              |        0.205 |        0.036 |        0.759 |              | 
## -------------|--------------|--------------|--------------|--------------|
## 
##

#
# Calculando a proporção de distribuição do status  das obras entre as cidades
# Valores arrendondados e na forma de porcentagem
status_table<-table(df$STATUS)
status_table <- prop.table(status_table) * 100 # valores em porcentagem
status_table<-round(status_table, digits = 1)
status_table

## 
##    Concluida Em andamento Sem Previsao 
##         20.5          3.6         75.9

#
# Ajustar nomes de colunas
df_status0=data.frame(status_table)
df_status1<-setnames(df_status0,"Var1","Status")
df_status<-setnames(df_status1,"Freq","Percentual")
df_status_sort <- df_status[order(df_status$Percentual,decreasing = TRUE),]
#
## QUAL O PERCENTUAL DE STATUS DAS OBRAS
#
# Ordenando os valores de percentual a partir do maior
# Coluna Status segue a ordenação pelo campo numerico Percentual em ordem decrescente
df_status_sort$Status <- factor(df_status_sort$Status, levels = unique(df_status_sort$Status)[order(df_status_sort$Percentual, decreasing = TRUE)])

# Plotando a informação gerada
plot_ly(df_status_sort,type='bar',x=~Status,y=~Percentual)%>%layout(title = "Status e Percentual das obras",xaxis = list(title = "Status"),yaxis = list(title = "Percentual (%)"))

#
## QUAL O NÚMERO DE CIDADES POR REGIÃO ?
#
# Total de cidades por regiao
df_reg_cid<- df%>% select("CIDADE","REGIAO")%>%
  group_by(REGIAO)%>%
  summarise("Total" = n())
head(df_reg_cid)

## # A tibble: 5 x 2
##   REGIAO       Total
##   <chr>        <int>
## 1 Centro-oeste    17
## 2 Nordeste       168
## 3 Norte           48
## 4 Sudeste         85
## 5 Sul             43

#
# Ordenando os totais de cidades por região
df_reg_cid_sort <- df_reg_cid[order(df_reg_cid$Total,decreasing = TRUE),]
head(df_reg_cid_sort)

## # A tibble: 5 x 2
##   REGIAO       Total
##   <chr>        <int>
## 1 Nordeste       168
## 2 Sudeste         85
## 3 Norte           48
## 4 Sul             43
## 5 Centro-oeste    17

# 
# Coluna Regiao segue a ordenação pelo campo numerico Total
df_reg_cid_sort$REGIAO <- factor(df_reg_cid_sort$REGIAO, levels = unique(df_reg_cid_sort$REGIAO)[order(df_reg_cid_sort$Total, decreasing = TRUE)])

# Plotando a informação gerada
plot_ly(df_reg_cid_sort,type='bar',x=~REGIAO,y=~Total)%>%layout(title = "Total cidades por regiao",xaxis = list(title = "Regiao"),yaxis = list(title = "Total"))

#
#
## QUAIS OS VALORES DE INVESTIMENTOS POR REGIÃO ?
#
# Agrupando por regiao os valores previstos
df_regiao_inv <- df %>% select(REGIAO,VALOR_TOTAL_PREVISTO)%>%
  group_by(REGIAO)%>%
  summarise(Total=sum(VALOR_TOTAL_PREVISTO))
#
# Ordenar os valores
df_regiao_inv_sort<-df_regiao_inv[order(df_regiao_inv$Total,decreasing = TRUE),]

# Coluna Regiao segue a ordenação pelo campo numerico Total em ordem decrescente
df_regiao_inv_sort$REGIAO <- factor(df_regiao_inv_sort$REGIAO, levels = unique(df_regiao_inv_sort$REGIAO)[order(df_regiao_inv_sort$Total, decreasing = TRUE)])

# Plotando a informação gerada
plot_ly(df_regiao_inv_sort,type='bar',x=~REGIAO,y=~Total)%>%layout(title = "Total previsto de investimento por regiao",xaxis = list(title = "Regiao"),yaxis = list(title = "Total (R$)"))

## BIBLIOTECA LEAFLET
#
# Mapa do Brasil e os status das obras nas cidades

# Passos necessários para gerar um mapa customizado com leaflet
# Vectorized SWITCH:
# Cada valor do status recebe um valor de 0 a 2
sit <- Vectorize(function(a) {
  switch(as.character(a),
         "Sem Previsao" = 0,
         "Em andamento" = 1,
         "Concluida" = 2
         )
}, "a")

# A função sit é aplicada ao coluna STATUS no dataframe
df$stat=sapply(df$STATUS,sit)

# Função que define a cor do icone de acordo com o valor do STATUS
getColor <- function(df) {
  sapply(df$stat, function(stat) {
    if(stat == 1) {
      "green"
    } 
    else if(stat == 0) {
      "red"
    } 
    else if(stat == 2){
      "blue"
    } })
}

# Customização do icone
icons <- awesomeIcons(
  icon = 'ios-close',
  iconColor = 'black',
  library = 'ion',
  markerColor = getColor(df)
)
# Mapa default
#map3 = leaflet(df) %>% addTiles()%>%
#  addMarkers(~long, ~lat,popup = (df$STATUS),label=df$CIDADE)
#map3

# Mapa customizado apresentando cores de acordo com status
map4 = leaflet(df) %>% 
  addTiles()%>%
  addAwesomeMarkers(~LONG, ~LAT, icon=icons,popup = (df$STATUS),label=df$CIDADE)

# Adicionando uma legenda para as cores dos icones
map4%>%
  addLegend(
  position='topright',
  colors= c("green", "red", "blue"),
  labels= c("Em andamento","Sem Previsao","Concluida"),
  opacity = 0.75,
  title="Legenda"
)

Cidades_digitais.R

Silvio Lima

2019-04-15