library(gtrendsR)
## Warning: package 'gtrendsR' was built under R version 4.1.3
## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.1.3
library(ggplot2)
library(maps)
## Warning: package 'maps' was built under R version 4.1.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(wbstats)
## Warning: package 'wbstats' was built under R version 4.1.3
library(sf)
## Warning: package 'sf' was built under R version 4.1.3
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(viridis)
## Warning: package 'viridis' was built under R version 4.1.3
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:maps':
##
## unemp
library(rnaturalearth)
## Warning: package 'rnaturalearth' was built under R version 4.1.3
library(rnaturalearthdata)
## Warning: package 'rnaturalearthdata' was built under R version 4.1.3
In this section, I have used the per-capita CO2 emissions in the year 2018 (pre COVID-19) around the world from world bank. First, I have extracted the data from world bank API for per capita emissions for all countries and then represented graphically using the world map.
worldmap_data <- rnaturalearth::ne_countries(scale = 50, returnclass = "sf")
emi_data <- wb_data(country = "countries_only",
indicator = "EN.ATM.CO2E.PC",
mrv = 1)
emi_world <- left_join(worldmap_data, emi_data, by = c("iso_a2" = "iso2c"))
North American countries have the highest amount of per-capita CO2 emissions followed by Australia and Middle Eastern countries. Moreover, African and South American countries have comparatively less per-capita CO2 emissions.
ggplot(emi_world) +
geom_sf(aes(fill = EN.ATM.CO2E.PC)) +
scale_fill_viridis("value") +
ggtitle(" CO2 emissions (metric tons per capita for 2018)") +
theme_bw()
I have also tried to see the trajectory of some of the highest CO2 emitter countries US EPA (2018). The red vertical line is the date for COP21 (Paris Agreement). The threshold has been used to see whether those countries were able to lower their emission level after the COP21. From the following graph, we can see that China is largest CO2 emitter country around the world with a growing tend over the years. United states is the second largest emitter with a somewhat constant trend of emissions. Apart from India, all the other countries exhibiting a constant level of carbon emissions over the years.
emi7C <- wb(country = c("US", "CA", "IN", "JPN", "DEU", "CHN", "RUS"), indicator = "EN.ATM.CO2E.KT", startdate = 1998, enddate = 2018)
## Warning: `wb()` was deprecated in wbstats 1.0.0.
## Please use `wb_data()` instead.
emi7C1 <- mutate(emi7C, emission = value/1000)
ggplot(emi7C1, aes(x=as.numeric(date), y=emission, color= country)) +
geom_line() + ggtitle("Trajectory of CO2 Emission from 1998 to 2018")+
theme(plot.title = element_text(size = 11, face = "bold")) +
ylab("CO2 emissions (Mega t)") + xlab("Year") +
theme(axis.title = element_text(size = 8))+ theme_classic()+
geom_vline(xintercept = 2015, color = "red",size = 0.5)
World Bank have CO2 emissions data for only until 2018. It would have been interesting to see how the emissions level changed due to the pandemic (especially due to reduced air traffic) for both the above graphs.
Being a huge sitcom fan, I wanted to see the global search trends for some of the most popular sitcoms within the last decade.
sitcoms = gtrends(c("How I Met Your Mother", "Friends", "The Office", "The Big Bang Theory", "Parks and Recreation"),
gprop = "web", time = "2012-01-01 2022-01-01")[[1]]
From the following graph, we can see that sitcom Friends (IMDB rating 9.0/10) has the highest Google search hits especially during the pandemic time. The Office is the second most popular sitcoms among the five based on Google search hits. Searches for “How I met your mother** was high during its last three season.
ggplot(data =sitcoms) +
geom_line(mapping = aes(x = date, y = hits, color=keyword
))+scale_color_manual(values=c("How I Met Your Mother"="red","Friends"="blue","The Office"="green","The Big Bang Theory"="yellow","Parks and Recreation"="black"))+
ggtitle("Searches for Popular Sitcoms 2012-2022")+
ylab("Number of Google Searches") + xlab("Time")+
theme(plot.title = element_text(size = 11, face = "bold"))
Last year during the fall semester when Netflix released the drama “Squid Game”, it became a social media sensation. I wanted to see how popular is the TV series across US.
Sgame <- gtrends(c("Squid Game"), time = "today 12-m", gprop = "web", geo=c("US"))
SgameUS <- Sgame$interest_by_region #data for states
USmap <- map_data("state")
head(USmap)
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
SgameUS$region <- sapply(SgameUS$location, tolower) # to match the region variable with the same variable in US map
head(SgameUS)
## location hits keyword geo gprop region
## 1 California 100 Squid Game US web california
## 2 Nevada 84 Squid Game US web nevada
## 3 Washington 81 Squid Game US web washington
## 4 New Jersey 80 Squid Game US web new jersey
## 5 Texas 78 Squid Game US web texas
## 6 Hawaii 78 Squid Game US web hawaii
comgamedta <- left_join(USmap,SgameUS,by="region")
From the following graph, we can see that Squid Game have highest search hits in California followed by Nevada, Arizona, Texas and Washington.
ggplot() +
geom_polygon(data=comgamedta,aes(x=long,y=lat,group=group,fill=hits),colour="black") + scale_fill_continuous(low = 'blue', high = 'green') +
labs(title="Google search hits for Squid Game in USA",cex.main=0.6, line = 0.1)