The data consists of google trends for AutoML /Automated machine learning. This was extracted from 2020 kaggle online survey: @https://www.kaggle.com/c/kaggle-survey-2020
URL: https://www.kaggle.com/parulpandey/automl-google-trends-data?select=relatedEntities.csv
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sea <- read.csv("C:/Users/Anhuynh/Desktop/Data Science Project/AI & Technology Trend/Searches.csv")
geo <- read.csv("C:/Users/Anhuynh/Desktop/Data Science Project/AI & Technology Trend/geoMap.csv")
ent <- read.csv("C:/Users/Anhuynh/Desktop/Data Science Project/AI & Technology Trend/relatedEntities.csv")
que <- read.csv("C:/Users/Anhuynh/Desktop/Data Science Project/AI & Technology Trend/relatedQueries.csv")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
sea$Date <- ym(sea$Date)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.2 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v readr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x lubridate::setdiff() masks base::setdiff()
## x lubridate::union() masks base::union()
library(ggmap)
## Warning: package 'ggmap' was built under R version 4.1.1
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
country <- distinct(geo, Country)
country_df <- as.data.frame(country)
locations_df <- mutate_geocode(country_df, Country)
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=China&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=South+Korea&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Hong+Kong&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Singapore&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Taiwan&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Israel&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Ireland&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Japan&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Switzerland&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Belgium&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=United+States&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Netherlands&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Canada&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=India&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Germany&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Norway&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=United+Kingdom&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Denmark&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=France&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Malaysia&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Sweden&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Australia&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Philippines&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Portugal&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=South+Africa&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Spain&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Russia&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Italy&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Pakistan&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Vietnam&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Mexico&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Thailand&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Poland&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Brazil&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Chile&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Argentina&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Ukraine&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Indonesia&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Colombia&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Turkey&key=xxx
locations <- as_tibble(locations_df)
geo_per_destination <- left_join(geo, locations, by = c("Country" = "Country"))
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.1.1
world <- map_data("world")
ggplot() +
geom_map(
data = world, map = world,
aes(long, lat, map_id = region),
color = "white", fill = "lightgrey", size = 0.1
) +
geom_point(
data = geo_per_destination,
aes(lon, lat,
color = "Country",
size=Number.of.Searches),
alpha = 0.5
) +
#labs(x = NULL, y = NULL, color = NULL)+
theme_void() +
theme(legend.position = "none")+
scale_color_manual(values = c(Country = "blue")) +
scale_size_continuous(range = c(2, 8)) +
geom_text_repel(data = locations, aes(x = lon, y = lat, label = Country, label.size = 0.25)) +
labs(title = "Google Searches of Automated Machine Learning across Countries in 2020",
size = "Scale",
color = NULL, subtitle = paste("Values in correspondence of location & popularity","|", "0 : not enough data; 50 : half as popular; 100 : most popular")) +
theme(plot.subtitle=element_text(size=9, hjust=0.25, face="italic", color="blue")) +
guides(color = guide_legend(override.aes = list(size = 5)))
## Warning: Ignoring unknown aesthetics: x, y
## Warning: Ignoring unknown aesthetics: label.size
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggplot(sea, aes(x = Date, y = Search.Interest)) +
geom_line(color = "#00AFBB", size = 1) +
scale_color_manual(values = "#00AFBB") +
scale_fill_manual(values = "#00AFBB") +
labs( x="Date",y="",title="Google Search for Automated ML over Years", subtitle = "Timeline Data (2015-2020)") +
theme(legend.position = "none") +
stat_smooth(
color = "#FC4E07", fill = "#FC4E07",
method = "loess") +
scale_x_date(date_labels = "%b-%Y") +
theme_dark() +
theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()
)
## `geom_smooth()` using formula 'y ~ x'
library(ggplot2)
ggplot(ent, aes(x = Search.Interest, y = reorder(Related.Entities,Search.Interest), label = Search.Interest)) +
geom_col(stat="identity", fill="blue", alpha=.9, width=.8) +
geom_text(position = position_stack(vjust = 0.5), size = 3, color = "white") +
labs( x="Scoring of Search Interest",y="",title="Category of Search Entities", subtitle = "Scoring is on a relative scale.") +
theme(legend.position = "none") +
theme_dark() +
theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()
) +
theme(axis.line = element_line(color = 'darkgrey'))
## Warning: Ignoring unknown parameters: stat
ggplot(que, aes(x = Search.Interest, y = reorder(Related.Queries, Search.Interest), label = Search.Interest)) +
geom_col(stat="identity", fill="purple", alpha=.9, width=.8) +
geom_text(position = position_stack(vjust = 0.5), size = 3, color = "white") +
labs( x="Scoring of Search Interest",y="",title="Category of Search Queries", subtitle = "Scoring is on a relative scale.") +
theme(legend.position = "none") +
theme_dark() +
theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()
) +
theme(axis.line = element_line(color = 'darkgrey'))
## Warning: Ignoring unknown parameters: stat