Esta é uma aplicação Markdown R Markdown Notebook. Quando uma aplicação é executada com esta tecnologia o código da aplicação e suas saidas são exibidas em um arquivo estático.
Preparamos o ambiente
gctorture (FALSE)
rm(list = ls(all.names = TRUE))
library(leaflet.extras)
## Loading required package: leaflet
library(apcluster)
##
## Attaching package: 'apcluster'
## The following object is masked from 'package:stats':
##
## heatmap
setwd("~/OneDrive/r-files/AffinityPropagationClustering/")
load(file = "apres2.rda")
load(file = "x2-13000.rda")
head(x2)
## [,1] [,2]
## [1,] -51.18575 -30.04310
## [2,] -51.15011 -30.02718
## [3,] -51.18697 -30.06465
## [4,] -51.20797 -30.14895
## [5,] -51.20896 -30.03037
## [6,] -51.20490 -29.98931
dim(x2)
## [1] 13000 2
summary(apres)
## Length Class Mode
## 113 APResult S4
Plotamos o modelo
plot(apres, x2)
Criamos a funcão para classificar novas entradas a partir do modelo
predict.apcluster <- function(s, exemplars, newdata)
{
simMat <- s(rbind(exemplars, newdata), sel=(1:nrow(newdata)) + nrow(exemplars))[1:nrow(exemplars), ]
unname(apply(simMat, 2, which.max))
}
Inicializamos a lista que conterá os resultados
resultado <- list()
Verificamos as bases de informações e as carregamos
setwd("~/Documents/r-files/AffinityPropagationClustering/")
filenames <- list.files(path = "~/Documents/r-files/AffinityPropagationClustering/geo/")
filenames
## [1] "comercio_1524507845.62241.csv" "comercio_1524572937.51169.csv"
## [3] "comercio_1524582351.75634.csv" "comercio_1524590349.33645.csv"
## [5] "comercio_1524592829.08411.csv" "comercio_1524684739.93334.csv"
## [7] "comercio_1524741390.87933.csv" "comercio_1524744606.04672.csv"
## [9] "comercio_1524749867.03171.csv" "comercio_1524755831.16559.csv"
## [11] "servico_1524761096.81902.csv" "servico_1524837368.42785.csv"
## [13] "servico_1524849869.27125.csv" "servico_1524856659.15295.csv"
## [15] "servico_1524857271.54193.csv" "servico_1525092736.75057.csv"
## [17] "servico_1525100217.34111.csv" "servico_1525108125.05884.csv"
## [19] "servico_1525271712.90871.csv" "servico_1525357143.86729.csv"
## [21] "servico_1525377708.98704.csv" "servico_1525431536.65195.csv"
## [23] "servico_1525438499.1497.csv"
setwd("~/Documents/r-files/AffinityPropagationClustering/geo/")
data <- do.call("rbind", lapply(filenames, read.csv, header = TRUE, sep = ";"))
setwd("~/Documents/r-files/AffinityPropagationClustering/")
head(data)
names(data)
## [1] "lat" "long" "accuracy"
## [4] "formatted_address" "index" "tipo"
## [7] "default"
dados <- cbind(data$long, data$lat, as.character(data$default), as.character(data$tipo))
dados <- as.data.frame(dados)
dim(dados)
## [1] 106079 4
dados$V1 <- as.numeric(as.character(dados$V1))
dados$V2 <- as.numeric(as.character(dados$V2))
dados <- dados[dados$V2 < 0, ]
dados <- subset(dados, !is.na(V1))
dim(dados)
## [1] 103041 4
dados$cluster = 0
head(dados)
tail(dados)
dadosfim = dados[1:length(dados$V1),]
head(dadosfim)
aa= length(dadosfim$V1)/1000
aa = aa+1
aa
## [1] 104.041
for(i in seq(from=1, to=length(dadosfim$V1)-1000, by=1000)){
#print(paste(i, (i+999), sep = " - "))
inicio = i
final = i+999
print(paste(inicio, final, sep = " - "))
resultado = predict.apcluster(negDistMat(r=2), x2[apres@exemplars, ], dadosfim[inicio:final, 1:2])
dados$cluster[inicio:final] = resultado
}
## [1] "1 - 1000"
## [1] "1001 - 2000"
## [1] "2001 - 3000"
## [1] "3001 - 4000"
## [1] "4001 - 5000"
## [1] "5001 - 6000"
## [1] "6001 - 7000"
## [1] "7001 - 8000"
## [1] "8001 - 9000"
## [1] "9001 - 10000"
## [1] "10001 - 11000"
## [1] "11001 - 12000"
## [1] "12001 - 13000"
## [1] "13001 - 14000"
## [1] "14001 - 15000"
## [1] "15001 - 16000"
## [1] "16001 - 17000"
## [1] "17001 - 18000"
## [1] "18001 - 19000"
## [1] "19001 - 20000"
## [1] "20001 - 21000"
## [1] "21001 - 22000"
## [1] "22001 - 23000"
## [1] "23001 - 24000"
## [1] "24001 - 25000"
## [1] "25001 - 26000"
## [1] "26001 - 27000"
## [1] "27001 - 28000"
## [1] "28001 - 29000"
## [1] "29001 - 30000"
## [1] "30001 - 31000"
## [1] "31001 - 32000"
## [1] "32001 - 33000"
## [1] "33001 - 34000"
## [1] "34001 - 35000"
## [1] "35001 - 36000"
## [1] "36001 - 37000"
## [1] "37001 - 38000"
## [1] "38001 - 39000"
## [1] "39001 - 40000"
## [1] "40001 - 41000"
## [1] "41001 - 42000"
## [1] "42001 - 43000"
## [1] "43001 - 44000"
## [1] "44001 - 45000"
## [1] "45001 - 46000"
## [1] "46001 - 47000"
## [1] "47001 - 48000"
## [1] "48001 - 49000"
## [1] "49001 - 50000"
## [1] "50001 - 51000"
## [1] "51001 - 52000"
## [1] "52001 - 53000"
## [1] "53001 - 54000"
## [1] "54001 - 55000"
## [1] "55001 - 56000"
## [1] "56001 - 57000"
## [1] "57001 - 58000"
## [1] "58001 - 59000"
## [1] "59001 - 60000"
## [1] "60001 - 61000"
## [1] "61001 - 62000"
## [1] "62001 - 63000"
## [1] "63001 - 64000"
## [1] "64001 - 65000"
## [1] "65001 - 66000"
## [1] "66001 - 67000"
## [1] "67001 - 68000"
## [1] "68001 - 69000"
## [1] "69001 - 70000"
## [1] "70001 - 71000"
## [1] "71001 - 72000"
## [1] "72001 - 73000"
## [1] "73001 - 74000"
## [1] "74001 - 75000"
## [1] "75001 - 76000"
## [1] "76001 - 77000"
## [1] "77001 - 78000"
## [1] "78001 - 79000"
## [1] "79001 - 80000"
## [1] "80001 - 81000"
## [1] "81001 - 82000"
## [1] "82001 - 83000"
## [1] "83001 - 84000"
## [1] "84001 - 85000"
## [1] "85001 - 86000"
## [1] "86001 - 87000"
## [1] "87001 - 88000"
## [1] "88001 - 89000"
## [1] "89001 - 90000"
## [1] "90001 - 91000"
## [1] "91001 - 92000"
## [1] "92001 - 93000"
## [1] "93001 - 94000"
## [1] "94001 - 95000"
## [1] "95001 - 96000"
## [1] "96001 - 97000"
## [1] "97001 - 98000"
## [1] "98001 - 99000"
## [1] "99001 - 1e+05"
## [1] "100001 - 101000"
## [1] "101001 - 102000"
## [1] "102001 - 103000"
length(resultado)
## [1] 1000
head(dados)
meucluster <- function(cluster) {
dadosc = dados[dados$cluster == cluster,]
tamanho = length(dados$cluster[dados$cluster==cluster])
leaflet(dadosc) %>%
addTiles(group="OSM") %>%
addCircles(~V1, ~V2, weight = 0.1, radius=8, color= 'blue',
stroke = TRUE, fillOpacity = 0.8) %>%
addLegend("topright", colors= "blue", labels=paste("com", tamanho, "alvaras", sep = " "), title=paste("Cluster nº", a, sep = " "))
}
a = 0
meucluster((a=a+1))
meucluster(6)
meucluster(10)
meucluster(12)
meucluster(13)
meucluster(14)
meucluster(17)
meucluster(19)
meucluster(34)
meucluster(36)
meucluster(45)
meucluster(47)
meucluster(68)
meucluster(70)
head(dados)
cluster = c(2)
dadosc = dados[dados$cluster == cluster,]
dadosc
tamanho = length(dados$cluster[dados$cluster==cluster])
tamanho
## [1] 1123
Para facilitar a navegação ao arrastar ou dar zoom no mapa recomenda-se desligar o layer de clusters para tanto, e religar após posicionar o mapa como desejado
pal <- colorFactor(
palette = 'Dark2',
domain = dados$cluster
)
leaflet(dados) %>%
addTiles(group="Mapa") %>%
addCircles(group="Clusters", ~V1, ~V2, weight = 0.1, radius=30, color=~pal(cluster),
stroke = TRUE, fillOpacity = 0.8, popup=~paste("Cluster: ", cluster, sep = " ")) %>%
addLegend(group="Legenda", "topright", colors= "", labels=paste(summary(apres)[1], "Clusters"), title="Alvaras em Porto Alegre") %>%
addLayersControl(overlayGroups = c("Mapa", "Clusters", "Legenda"),
options = layersControlOptions(collapsed = TRUE))
## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors