library(mapdeck)
## Warning: package 'mapdeck' was built under R version 3.6.2
library(factoextra)
## Loading required package: ggplot2
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following objects are masked from 'package:mapdeck':
## 
##     add_heatmap, add_mesh, add_sf, add_text
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
rm(list = ls())

#The goal of the job is to get the probability of being a victim of a crime on each suburb of Buenos Aires city for a specific time zone.
#The data is available from the Buenos Aires government web

crimes_2016 <- read.csv(file = "/Users/user/Desktop/Data Scientis R Python SQL/Datasets/Delitos/delitos_2016.csv")
crimes_2017 <- read.csv(file = "/Users/user/Desktop/Data Scientis R Python SQL/Datasets/Delitos/delitos_2017.csv")
crimes_2018 <- read.csv(file = "/Users/user/Desktop/Data Scientis R Python SQL/Datasets/Delitos/delitos_2018.csv")
crimes_2019 <- read.csv(file = "/Users/user/Desktop/Data Scientis R Python SQL/Datasets/Delitos/delitos_2019.csv")
crimes_2020 <- read.csv(file = "/Users/user/Desktop/Data Scientis R Python SQL/Datasets/Delitos/delitos_2020.csv")
crimes_2021 <- read.csv(file = "/Users/user/Desktop/Data Scientis R Python SQL/Datasets/Delitos/delitos_2021.csv", header = T, sep = ";")
#There are some columns unnecessary for the analysis that will be removed from the datasets. 
crimes_2016_df <- crimes_2016[,c(-1,-2,-5,-7)]
crimes_2017_df <- crimes_2017[,c(-1,-2,-5,-7)]
crimes_2018_df <- crimes_2018[,c(-1,-2,-5,-7)]
crimes_2019_df <- crimes_2019[,c(-1,-2,-5,-7)]
crimes_2020_df <- crimes_2020[,c(-1,-2,-3,-4,-5,-8,-9,-11)]
crimes_2021_df <- crimes_2021[,c(-1,-2,-3,-4,-5,-8,-9,-11)]

colnames(crimes_2020_df) = c("franja_horaria","tipo_delito","barrio","lat","long","cantidad_registrada")
colnames(crimes_2021_df) = c("franja_horaria","tipo_delito","barrio","lat","long","cantidad_registrada")
#The datasets will be merged into one 
crimes_2016_df <- crimes_2016_df[,c("franja_horaria","tipo_delito","barrio","lat","long","cantidad_registrada")]
crimes_2017_df <- crimes_2017_df[,c("franja_horaria","tipo_delito","barrio","lat","long","cantidad_registrada")]
crimes_2018_df <- crimes_2018_df[,c("franja_horaria","tipo_delito","barrio","lat","long","cantidad_registrada")]
crimes_2019_df <- crimes_2019_df[,c("franja_horaria","tipo_delito","barrio","lat","long","cantidad_registrada")]

crimes <- rbind(crimes_2016_df,crimes_2017_df,crimes_2018_df,crimes_2019_df,crimes_2020_df,crimes_2021_df)

#Column names are translated
colnames(crimes) <-c("time_zone","crime_type","suburb","lat","long","number")
#The dataset has 7 columns and 648656 registers
summary(crimes)
##    time_zone                      crime_type             suburb      
##  20     : 41735   Homicidio            :  1424   Palermo    : 51904  
##  19     : 40951   Hurto (sin violencia):242060   Balvanera  : 44464  
##  18     : 39915   Lesiones             : 52414   Flores     : 33398  
##  21     : 36949   Robo (con violencia) :352758   Recoleta   : 30185  
##  17     : 35347                                  Caballito  : 29799  
##  12     : 33954                                  San Nicolás: 27739  
##  (Other):419805                                  (Other)    :431167  
##      lat                long               number      
##  Length:648656      Length:648656      Min.   : 1.00   
##  Class :character   Class :character   1st Qu.: 1.00   
##  Mode  :character   Mode  :character   Median : 1.00   
##                                        Mean   : 1.01   
##                                        3rd Qu.: 1.00   
##                                        Max.   :16.00   
##                                        NA's   :259972
##The columns are:

#time_zone: Time zone where the crimen was commited displayed in hours. (1 to 24 hs)
#crime_type: Crime type
#suburb: Suburb of Buenos Aires city
#lat: Latitude 
#long: Longitude
#number: Number of victims affected by the crime

#Preprocessing data

#The "latitude" and "longitude" columns will be changed to the correct datatype
crimes$lat <- as.double(crimes$lat)
## Warning: NAs introduced by coercion
crimes$long <- as.double(crimes$long)
## Warning: NAs introduced by coercion
#There are some grammatical errors that will be fixed using gsub function.
#Values without data will be imputed by NA
crimes$suburb <- as.factor(gsub("^$",NA, crimes$suburb))
crimes$time_zone <- as.factor(gsub("S/D",NA, crimes$time_zone))
crimes$time_zone <- as.factor(gsub("sd",NA, crimes$time_zone))
crimes$suburb <- as.factor(gsub("La Boca","Boca", crimes$suburb))
crimes$time_zone <- as.factor(gsub("SD",NA, crimes$time_zone))

#For each crime is assumed at least one victim. So, in column "number" NA values will be imputed by 1. 
crimes[is.na(crimes$number),"number"] <- 1

#Atypical values in "Lat" and "Long" columns will be removed
crimes <- crimes[crimes$lat>-35 & crimes$long>-59,]

#There are some registers with NA values in each columns that will be removed
crimes <- crimes[rowSums(is.na(crimes)) != ncol(crimes), ]

#Registers without time zone will be removed
crimes <- crimes[!is.na(crimes$time_zone),]

#Functions

#The function "proportion" will return the proportions of a numeric column grouping by categorical column
proportion <- function(df,categoric_column,numeric_column,column_name){
prop <- data.frame()
for (i in unique(df[,categoric_column])){

  b <- data.frame(column_name =df[df[,categoric_column]==i,numeric_column]/sum(df[df[,categoric_column]==i,numeric_column]))
  colnames(b) <- column_name
  prop <- rbind(prop,b)
  

}
return(prop)
}


#The function "sample_function" will return a random sample of index
sample_function <- function(dataframe, proportion){
  
  n = round(nrow(dataframe)*proportion,0)
  indice = sample(1:n,n, replace = F)
  return(indice)
}

#Making of the final dataset:

#It is assumed that there are some areas more dangerous than others in each suburb
#So each suburb will be grouping into three zones. For example, the suburb "Palermo" will be divided in "Palermo Zone 1", "Palermo Zone 2" and "Palermo Zone 3"
crimes_zone <- data.frame()

for (i in unique(crimes$suburb)){
  for (j in 1:3){
   
    data <- subset(crimes, suburb == i)
    filt_long <- scale(subset(crimes, suburb == i, select =  "long"),
                       center = T,
                       scale = F)
    
    filt_lat <- scale(subset(crimes, suburb == i, select =  "lat"),
                       center = T,
                       scale = F)
    if (j==1){
    data_filt <- data.frame(data[filt_long > 0 & filt_lat > 0,],
                            Zone = paste(i," Zone ",j))}
    else if (j==2){
      data_filt <- data.frame(data[filt_long < 0 & filt_lat > 0,],
                              Zone = paste(i," Zone ",j))}
    else {
      data_filt <- data.frame(data_filt <- data[filt_lat < 0,],
      Zone = paste(i," Zone ",j))}
    
    crimes_zone <- rbind(crimes_zone,data_filt)
    }
  }
  



#Next step is to group by zone and timezone
crimes_group <- data.frame(
  aggregate(number~Zone+time_zone,crimes_zone, FUN = sum),
 lat = aggregate(lat~Zone+time_zone,crimes_zone, FUN = mean)[,"lat"],
long = aggregate(long~Zone+time_zone,crimes_zone, FUN = mean)[,"long"])


#The suburb column will be added into the final dataset
crimes_group$suburb <- gsub("  Zone  3","",gsub("  Zone  2","",gsub("  Zone  1", "", crimes_group$Zone)))

#The portion of victims grouping by suburb will be added into the final dataset
crimes_group[,"prop_suburb"] <- proportion(crimes_group,"suburb","number","prop_suburb")

#The portion of victims grouping by time zone will be added into the final dataset
crimes_group[,"prop_time_zone"] <- proportion(crimes_group,"time_zone","number","prop_time_zone")

#Transform the number of victims as proportion
crimes_group$prop_number <- prop.table(crimes_group$number)


#Suburb column in no longer needed
crimes_group$suburb <- NULL

#Final dataset:
head(crimes_group)
##                         Zone time_zone number       lat      long
## 1 Parque Avellaneda  Zone  1         0    158 -34.64349 -58.47105
## 2 Parque Avellaneda  Zone  2         0    145 -34.64352 -58.48404
## 3 Parque Avellaneda  Zone  3         0    275 -34.65383 -58.47556
## 4           Palermo  Zone  1         0    365 -34.57501 -58.41387
## 5           Palermo  Zone  2         0    465 -34.57356 -58.43393
## 6           Palermo  Zone  3         0   1395 -34.58866 -58.42350
##   prop_suburb prop_time_zone  prop_number
## 1  0.01764574    0.005606018 0.0002861127
## 2  0.01619388    0.005144763 0.0002625718
## 3  0.03071253    0.009757309 0.0004979809
## 4  0.00580746    0.012950610 0.0006609565
## 5  0.00692428    0.016498723 0.0008420405
## 6  0.01183828    0.049496168 0.0025261214

#Clustering: Under the assumption that suburbs don’t have a high rate of full-time crime, each zone/hour will be clustered into three groups: 1, 2 or 3, where group number 1 will have a high rate of crime, the second one a medium rate and the third one a low rate

#Using the kmeans function, 3 centers will be taken
set.seed(1)
k3 <- kmeans(scale(crimes_group[,c("lat","long","prop_suburb","prop_time_zone","prop_number")], center = T, scale = T)
             ,nstart = 25,
             centers = 3)

clusters <- fviz_cluster(k3, data = scale(data.frame(crimes_group[,c("lat","long","prop_suburb","prop_time_zone","prop_number")]), center = T, scale = T))[[1]]["cluster"]

#Cluster column is added to the final dataset
crimes_group_clust <- cbind(crimes_group,clusters)

#The clustering shows that the cluster 1 groups 371 zones and 207980 number of crimes, resutlting in the highest ratio of 561 numbers of victims per zone. Cluster 2 represent a medium risk of crime with a ratio of 147 crimes per zone; and the cluster 3 the lower risk with 80 crimes per zone.
clusters_info <- data.frame(table(clusters),aggregate(number~cluster,crimes_group_clust,FUN =sum)[2])
clusters_info$ratio <- round((clusters_info$number/clusters_info$Freq))

clusters_info
##   clusters Freq number ratio
## 1        1  371 207980   561
## 2        2 1351 199204   147
## 3        3 1804 145046    80
#Next map shows the locations of the zones clustered in the "Cluster 3". Those zones represents the highest number of crimes in Buenos Aires.
#By double clic on map to zoome in
map <- mapdeck(token = "pk.eyJ1IjoidGFmdTUiLCJhIjoiY2s5Mm10aXNlMGE1NTNnbWsyc3hlcmJsMiJ9.t-XtysS1KobBYLBrig_VEQ", style = mapdeck_style("dark"), location = c(-58.48307  , -34.54591), zoom =100)
 
map <- add_pointcloud(map, 
                      data =subset(crimes_group_clust, clusters ==1, select = c("lat","long")) , 
                      lon ="long" , 
                      lat = "lat",
                      layer_id = "clusters",
                       focus_layer = T,
                      digits = 100,
                      fill_colour = "red")
## Registered S3 method overwritten by 'jsonify':
##   method     from    
##   print.json jsonlite
   map

#Propability: By using Bayes’ Theorem it is possible to calculate the probability of crimes for each zone given an specific timezone

#The Bayes data frame will display the probability of crime for each zone at each timezone. One column per timezone will be added
bayes_prob <- data.frame(Zone = as.character(rev(unique(crimes_group$Zone))))
x <- data.frame()    

for(h in as.character(0:23)){
  for (e in unique(crimes_group$Zone)){
    
    
    prob_h <-  sum(subset(crimes_group,
                          time_zone==h,
                          select ="number")) /sum(subset(crimes_group,
                                                                      select ="number"))
    
    prob_e <- sum(subset(crimes_group, 
                         Zone == e, 
                         select ="number"))/sum(subset(crimes_group,
                                                                    select ="number"))
    
    prob_h_given_e <-if(nrow(subset(crimes_group,
                    time_zone == h & Zone == e,
                    select ="number"))==0){
      0} else{
      sum(subset(crimes_group,
                    time_zone == h & Zone == e,
                    select ="number")) / sum(subset(crimes_group,
                                                                 Zone == e,
                                                                 select ="number"))}
    
    prob_e_given_h <- prob_h_given_e*prob_e/prob_h   
    x <- rbind(prob_e_given_h,x)
  }
  
  
names(x) <- gsub(" ","",paste("prob_at_",h,"_hs"))
bayes_prob <- cbind(bayes_prob,x)
x <-  data.frame()
  
}

head(bayes_prob)
##                        Zone prob_at_0_hs prob_at_1_hs prob_at_2_hs
## 1 Villa Del Parque  Zone  3 0.0003902924 0.0007842454 0.0010101010
## 2 Villa Del Parque  Zone  2 0.0001064434 0.0006099686 0.0007856341
## 3 Villa Del Parque  Zone  1 0.0001419245 0.0004356919 0.0006734007
## 4    Puerto Madero  Zone  3 0.0008515470 0.0008713838 0.0001122334
## 5    Puerto Madero  Zone  2 0.0004967357 0.0009585221 0.0006734007
## 6    Puerto Madero  Zone  1 0.0001419245 0.0002614151 0.0001122334
##   prob_at_3_hs prob_at_4_hs prob_at_5_hs prob_at_6_hs prob_at_7_hs
## 1 0.0003883495 0.0001296008 0.0005010020 0.0004850445 0.0012759171
## 2 0.0002588997 0.0003888025 0.0002004008 0.0005658852 0.0005741627
## 3 0.0007766990 0.0006480041 0.0008016032 0.0004850445 0.0003827751
## 4 0.0005177994 0.0005184033 0.0005010020 0.0013742926 0.0007017544
## 5 0.0006472492 0.0002592017 0.0003006012 0.0002425222 0.0005103668
## 6 0.0001294498 0.0005184033 0.0006012024 0.0001616815 0.0003189793
##   prob_at_8_hs prob_at_9_hs prob_at_10_hs prob_at_11_hs prob_at_12_hs
## 1 0.0011819749 0.0014663450  0.0010674181  0.0011401546  0.0011531204
## 2 0.0006894853 0.0006149189  0.0005977542  0.0004222795  0.0004892026
## 3 0.0004432406 0.0006149189  0.0005123607  0.0001266838  0.0004892026
## 4 0.0004924895 0.0008041247  0.0005123607  0.0006756471  0.0007338039
## 5 0.0006402364 0.0002838087  0.0003415738  0.0005067354  0.0004542596
## 6 0.0003447427 0.0001892058  0.0001707869  0.0001689118  0.0001397722
##   prob_at_13_hs prob_at_14_hs prob_at_15_hs prob_at_16_hs prob_at_17_hs
## 1  0.0006946476  0.0005189593  0.0005349882  0.0005924171  0.0007637897
## 2  0.0002559228  0.0005535566  0.0003923247  0.0004878729  0.0006973732
## 3  0.0002193624  0.0004843620  0.0002853271  0.0002787845  0.0001992495
## 4  0.0006215268  0.0011071132  0.0011769741  0.0010454419  0.0009962475
## 5  0.0004387248  0.0005189593  0.0003566588  0.0005227209  0.0003984990
## 6  0.0002924832  0.0002421810  0.0002853271  0.0003484806  0.0002656660
##   prob_at_18_hs prob_at_19_hs prob_at_20_hs prob_at_21_hs prob_at_22_hs
## 1  0.0005242159  0.0005103343  5.486667e-04  0.0008235976  0.0008367617
## 2  0.0002038617  0.0003118709  1.920334e-04  0.0002135253  0.0002091904
## 3  0.0002329848  0.0003118709  1.920334e-04  0.0001830217  0.0002091904
## 4  0.0011649241  0.0010490204  6.309668e-04  0.0005185614  0.0003835158
## 5  0.0004950928  0.0005103343  3.840667e-04  0.0003355398  0.0003486507
## 6  0.0002912310  0.0001134076  5.486667e-05  0.0001830217  0.0001045952
##   prob_at_23_hs
## 1  4.910714e-04
## 2  2.678571e-04
## 3  4.464286e-05
## 4  5.357143e-04
## 5  6.250000e-04
## 6  1.785714e-04

#Once the Bayes dataframe is made it’s possible to plot the probabilities of crime for a specific zone in each timezone. A random zone will be taken for this example.

index_zone <- sample(1:147,1)
data_zone <- data.frame(t(bayes_prob[index_zone,-1])[,])


zone_name <- gsub(" ","_",bayes_prob[index_zone,1])
colnames(data_zone) <- zone_name
data_zone[,"time_zone"] <- factor(0:23, labels=rownames(data_zone))

rownames(data_zone) <- NULL


plotly <- plot_ly(data = data_zone,
                  x = ~time_zone,
        y =~data_zone[,1],
         type = "scatter",
  mode = "lines+markers",
  line = list(width = 4)) 
  
plotly <- layout(plotly,
                 title = gsub("__"," ",colnames(data_zone)[1]),
                 xaxis = list(title = "Time Zone", 
                              showgrid = F,
                              tickfont = list(family = 'Arial',
                                              size = 12,
                                              color = 'dark')),
                 yaxis = list(title = "Probability",
                              showgrid = F,
                              tickfront = list(family = 'dark',
                                               size = 12)))

plotly

#In the same way might be interesting to see the probabilities for a specific timezone in each zone. In this section the sum of all probabilities is 1

index_time <- sample(2:24,1)
data_time <- bayes_prob[,c(1,index_time)]
data_time <- data_time[order(data_time$Zone),]
plotly <- plot_ly(data = data_time,
                  x = ~Zone,
                  y =~data_time[,2],
                  type = "scatter",
                  mode = "lines+markers",
                  line = list(width = 4)) 
  
plotly <- layout(plotly,
                 title = str_to_title(gsub("_"," ",colnames(data_time)[2])),
                 xaxis = list(title = "Zone", 
                              showgrid = F,
                              tickfont = list(family = 'Arial',
                                              size = 12,
                                              color = 'dark')),
                 yaxis = list(title = "Probability",
                              showgrid = F,
                              tickfront = list(family = 'dark',
                                               size = 12)))
plotly

#By using plotly function the top 20 highest probability of crime will be represented in a graphic with the corresponding zone. All zones were clustered as Cluster 1 which represents the highest rate of crime.

t <- nrow(bayes_prob)
m<- data.frame()

for(i in 2:(ncol(bayes_prob))){
  
  k <- as.data.frame(head(bayes_prob[order(bayes_prob[,i],decreasing = T),c(1,i)],t))
  names(k) <- c("Zone","Probability")
  m <- rbind(m,k)
}

s<- data.frame()

for (i in 0:23){
  
  n <- as.matrix(rep(i,t))
  s<- rbind(s,n)
}

m<- cbind(m,s)

colnames(m)<- c("Zone","Probability","time_zone")
m$time_zone <- as.character(m$time_zone)


data <- merge(unique(m),crimes_group_clust, by = c("Zone","time_zone"),,all.x = F)[,c("Zone","time_zone","cluster","Probability")]


data$zone_timezone <-  as.factor(paste(data$Zone,":",data$time_zone, "hs"))

data_top20 <- data[order(data$Probability,decreasing = T),] %>% head(20)
data_top20$Zone <- NULL

plotly_time <- plot_ly(data_top20, 
        x = ~as.character(unique(data_top20$zone_timezone)),
        y = ~Probability,
        color = ~cluster,
        name = ~"cluster",
        text = ~paste("Cluster ",cluster),
        textposition = "outside",
        labels = ~cluster,
        outsidetextfont = list(color = "#000000"),
        type = 'bar',
        mode = 'markers',
        marker = list(size = ~zone_timezone,
                      opacity = 0.5))  

plotly_time <- plotly_time  %>%  layout(title = "Zones With The Highest Probability Of Crime",
                                        xaxis = list(title = "Zones",
                                                     showgrid = F,
                                                     showticklabels = F),
                                        yaxis = list(title = "Probability of crime",
                                                     showgrid = F))

plotly_time
## Warning: 'bar' objects don't have these attributes: 'labels', 'mode'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'texttemplate', 'hovertext', 'hovertemplate', 'textposition', 'insidetextanchor', 'textangle', 'textfont', 'insidetextfont', 'outsidetextfont', 'constraintext', 'cliponaxis', 'orientation', 'base', 'offset', 'width', 'marker', 'offsetgroup', 'alignmentgroup', 'selected', 'unselected', 'r', 't', '_deprecated', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'basesrc', 'offsetsrc', 'widthsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

#In the same way will be represented on a bar graphic the zones with the lowest probability of crime. Those ones were clustered as “Cluster 2” and “Cluster 3”

data_low20 <- data[order(data$Probability,decreasing = F),] %>% head(20)
data_low20$Zone <- NULL

plotly_time <- plot_ly(data_low20, 
        x = ~as.character(unique(data_low20$zone_timezone)),
        y = ~Probability,
        color = ~cluster,
        name = ~paste("Cluster ",cluster),
        textposition = "outside",
        labels = ~cluster,
        outsidetextfont = list(color = "#000000"),
        type = 'bar',
        mode = 'markers',
        marker = list(size = ~zone_timezone,
                      opacity = 0.5))  

plotly_time <- plotly_time  %>%  layout(title = "Zones With The Lowest Probability Of Crime",
                                        xaxis = list(title = "Zones",
                                                     showgrid = F,
                                                     showticklabels = F),
                                        yaxis = list(title = "Probability of crime",
                                                     showgrid = F))

plotly_time
## Warning: 'bar' objects don't have these attributes: 'labels', 'mode'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'texttemplate', 'hovertext', 'hovertemplate', 'textposition', 'insidetextanchor', 'textangle', 'textfont', 'insidetextfont', 'outsidetextfont', 'constraintext', 'cliponaxis', 'orientation', 'base', 'offset', 'width', 'marker', 'offsetgroup', 'alignmentgroup', 'selected', 'unselected', 'r', 't', '_deprecated', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'basesrc', 'offsetsrc', 'widthsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'bar' objects don't have these attributes: 'labels', 'mode'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'texttemplate', 'hovertext', 'hovertemplate', 'textposition', 'insidetextanchor', 'textangle', 'textfont', 'insidetextfont', 'outsidetextfont', 'constraintext', 'cliponaxis', 'orientation', 'base', 'offset', 'width', 'marker', 'offsetgroup', 'alignmentgroup', 'selected', 'unselected', 'r', 't', '_deprecated', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'basesrc', 'offsetsrc', 'widthsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'