Ir para Fase 2.

Esta é uma aplicação Markdown R Markdown Notebook. Quando uma aplicação é executada com esta tecnologia o código da aplicação e suas saidas são exibidas em um arquivo estático.

Preparamos o ambiente

gctorture (FALSE)
rm(list = ls(all.names = TRUE))
library(leaflet.extras)
## Loading required package: leaflet
library(apcluster)
## 
## Attaching package: 'apcluster'
## The following object is masked from 'package:stats':
## 
##     heatmap
setwd("~/OneDrive/r-files/AffinityPropagationClustering/")
load(file = "apres2.rda")
load(file = "x2-13000.rda")
head(x2)
##           [,1]      [,2]
## [1,] -51.18575 -30.04310
## [2,] -51.15011 -30.02718
## [3,] -51.18697 -30.06465
## [4,] -51.20797 -30.14895
## [5,] -51.20896 -30.03037
## [6,] -51.20490 -29.98931
dim(x2)
## [1] 13000     2
summary(apres)
##   Length    Class     Mode 
##      113 APResult       S4

Plotamos o modelo

plot(apres, x2)

Criamos a funcão para classificar novas entradas a partir do modelo

predict.apcluster <- function(s, exemplars, newdata)
{
  simMat <- s(rbind(exemplars, newdata), sel=(1:nrow(newdata)) + nrow(exemplars))[1:nrow(exemplars), ]
  unname(apply(simMat, 2, which.max))
}

Inicializamos a lista que conterá os resultados

resultado <- list()

Verificamos as bases de informações e as carregamos

setwd("~/Documents/r-files/AffinityPropagationClustering/")
filenames <- list.files(path = "~/Documents/r-files/AffinityPropagationClustering/geo/") 
filenames
##  [1] "comercio_1524507845.62241.csv" "comercio_1524572937.51169.csv"
##  [3] "comercio_1524582351.75634.csv" "comercio_1524590349.33645.csv"
##  [5] "comercio_1524592829.08411.csv" "comercio_1524684739.93334.csv"
##  [7] "comercio_1524741390.87933.csv" "comercio_1524744606.04672.csv"
##  [9] "comercio_1524749867.03171.csv" "comercio_1524755831.16559.csv"
## [11] "servico_1524761096.81902.csv"  "servico_1524837368.42785.csv" 
## [13] "servico_1524849869.27125.csv"  "servico_1524856659.15295.csv" 
## [15] "servico_1524857271.54193.csv"  "servico_1525092736.75057.csv" 
## [17] "servico_1525100217.34111.csv"  "servico_1525108125.05884.csv" 
## [19] "servico_1525271712.90871.csv"  "servico_1525357143.86729.csv" 
## [21] "servico_1525377708.98704.csv"  "servico_1525431536.65195.csv" 
## [23] "servico_1525438499.1497.csv"
setwd("~/Documents/r-files/AffinityPropagationClustering/geo/") 
data <- do.call("rbind", lapply(filenames, read.csv, header = TRUE, sep = ";")) 
setwd("~/Documents/r-files/AffinityPropagationClustering/")
head(data)
names(data)
## [1] "lat"               "long"              "accuracy"         
## [4] "formatted_address" "index"             "tipo"             
## [7] "default"
dados <- cbind(data$long, data$lat, as.character(data$default), as.character(data$tipo))
dados <- as.data.frame(dados)
dim(dados)
## [1] 106079      4
dados$V1 <- as.numeric(as.character(dados$V1))
dados$V2 <- as.numeric(as.character(dados$V2))
dados <- dados[dados$V2 < 0, ]
dados <- subset(dados, !is.na(V1))
dim(dados)
## [1] 103041      4
dados$cluster = 0
head(dados)
tail(dados)
dadosfim = dados[1:length(dados$V1),]
head(dadosfim)
aa= length(dadosfim$V1)/1000
aa = aa+1
aa
## [1] 104.041
for(i in seq(from=1, to=length(dadosfim$V1)-1000, by=1000)){
  #print(paste(i, (i+999), sep = " - "))
  inicio = i
  final = i+999
  print(paste(inicio, final, sep = " - "))
  resultado = predict.apcluster(negDistMat(r=2), x2[apres@exemplars, ],  dadosfim[inicio:final, 1:2])
  dados$cluster[inicio:final] = resultado
}
## [1] "1 - 1000"
## [1] "1001 - 2000"
## [1] "2001 - 3000"
## [1] "3001 - 4000"
## [1] "4001 - 5000"
## [1] "5001 - 6000"
## [1] "6001 - 7000"
## [1] "7001 - 8000"
## [1] "8001 - 9000"
## [1] "9001 - 10000"
## [1] "10001 - 11000"
## [1] "11001 - 12000"
## [1] "12001 - 13000"
## [1] "13001 - 14000"
## [1] "14001 - 15000"
## [1] "15001 - 16000"
## [1] "16001 - 17000"
## [1] "17001 - 18000"
## [1] "18001 - 19000"
## [1] "19001 - 20000"
## [1] "20001 - 21000"
## [1] "21001 - 22000"
## [1] "22001 - 23000"
## [1] "23001 - 24000"
## [1] "24001 - 25000"
## [1] "25001 - 26000"
## [1] "26001 - 27000"
## [1] "27001 - 28000"
## [1] "28001 - 29000"
## [1] "29001 - 30000"
## [1] "30001 - 31000"
## [1] "31001 - 32000"
## [1] "32001 - 33000"
## [1] "33001 - 34000"
## [1] "34001 - 35000"
## [1] "35001 - 36000"
## [1] "36001 - 37000"
## [1] "37001 - 38000"
## [1] "38001 - 39000"
## [1] "39001 - 40000"
## [1] "40001 - 41000"
## [1] "41001 - 42000"
## [1] "42001 - 43000"
## [1] "43001 - 44000"
## [1] "44001 - 45000"
## [1] "45001 - 46000"
## [1] "46001 - 47000"
## [1] "47001 - 48000"
## [1] "48001 - 49000"
## [1] "49001 - 50000"
## [1] "50001 - 51000"
## [1] "51001 - 52000"
## [1] "52001 - 53000"
## [1] "53001 - 54000"
## [1] "54001 - 55000"
## [1] "55001 - 56000"
## [1] "56001 - 57000"
## [1] "57001 - 58000"
## [1] "58001 - 59000"
## [1] "59001 - 60000"
## [1] "60001 - 61000"
## [1] "61001 - 62000"
## [1] "62001 - 63000"
## [1] "63001 - 64000"
## [1] "64001 - 65000"
## [1] "65001 - 66000"
## [1] "66001 - 67000"
## [1] "67001 - 68000"
## [1] "68001 - 69000"
## [1] "69001 - 70000"
## [1] "70001 - 71000"
## [1] "71001 - 72000"
## [1] "72001 - 73000"
## [1] "73001 - 74000"
## [1] "74001 - 75000"
## [1] "75001 - 76000"
## [1] "76001 - 77000"
## [1] "77001 - 78000"
## [1] "78001 - 79000"
## [1] "79001 - 80000"
## [1] "80001 - 81000"
## [1] "81001 - 82000"
## [1] "82001 - 83000"
## [1] "83001 - 84000"
## [1] "84001 - 85000"
## [1] "85001 - 86000"
## [1] "86001 - 87000"
## [1] "87001 - 88000"
## [1] "88001 - 89000"
## [1] "89001 - 90000"
## [1] "90001 - 91000"
## [1] "91001 - 92000"
## [1] "92001 - 93000"
## [1] "93001 - 94000"
## [1] "94001 - 95000"
## [1] "95001 - 96000"
## [1] "96001 - 97000"
## [1] "97001 - 98000"
## [1] "98001 - 99000"
## [1] "99001 - 1e+05"
## [1] "100001 - 101000"
## [1] "101001 - 102000"
## [1] "102001 - 103000"
length(resultado)
## [1] 1000
head(dados)
meucluster <- function(cluster) {
  dadosc = dados[dados$cluster == cluster,]
  tamanho = length(dados$cluster[dados$cluster==cluster])
  leaflet(dadosc) %>%
    addTiles(group="OSM") %>% 
    addCircles(~V1, ~V2, weight = 0.1, radius=8, color= 'blue',
               stroke = TRUE, fillOpacity = 0.8) %>% 
    addLegend("topright", colors= "blue", labels=paste("com", tamanho, "alvaras", sep = " "), title=paste("Cluster nº", a, sep = " "))
 
}

a = 0
meucluster((a=a+1))
meucluster(6)
meucluster(10)
meucluster(12)
meucluster(13)
meucluster(14)
meucluster(17)
meucluster(19)
meucluster(34)
meucluster(36)
meucluster(45)
meucluster(47)
meucluster(68)
meucluster(70)
head(dados)
cluster = c(2)
dadosc = dados[dados$cluster == cluster,]
dadosc
tamanho = length(dados$cluster[dados$cluster==cluster])
tamanho
## [1] 1123

Para facilitar a navegação ao arrastar ou dar zoom no mapa recomenda-se desligar o layer de clusters para tanto, e religar após posicionar o mapa como desejado

pal <- colorFactor(
  palette = 'Dark2',
  domain = dados$cluster
)

leaflet(dados) %>%
  addTiles(group="Mapa") %>% 
  addCircles(group="Clusters", ~V1, ~V2, weight = 0.1, radius=30, color=~pal(cluster),
             stroke = TRUE, fillOpacity = 0.8, popup=~paste("Cluster: ", cluster,  sep = " ")) %>% 
  addLegend(group="Legenda", "topright", colors= "", labels=paste(summary(apres)[1], "Clusters"), title="Alvaras em Porto Alegre") %>% 
  addLayersControl(overlayGroups = c("Mapa", "Clusters", "Legenda"),
  options = layersControlOptions(collapsed = TRUE))
## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors