options(width=100)
knitr::opts_chunk$set(out.width='1000px',dpi=200,message=FALSE,warning=FALSE)
#load packages and csv file
library(ggplot2)
library(dplyr)
library(gridExtra)
library(grid)
library(RColorBrewer)
library(ggrepel)
library(ggthemes)
library(viridis)
library(ggjoy)
df<-read.csv('ufo_sighting_data.csv',sep=',',stringsAsFactors=F)
df$latitude<-as.numeric(df$latitude)
df$year<-sapply(df$Date_time, function(x) as.numeric(strsplit(strsplit(x," ")[[1]][1],"/")[[1]][3]))
df$month<-sapply(df$Date_time, function(x) as.numeric(strsplit(strsplit(x," ")[[1]][1],"/")[[1]][1]))
df$day<-sapply(df$Date_time, function(x) as.numeric(strsplit(strsplit(x," ")[[1]][1],"/")[[1]][2]))
df$hour<-sapply(df$Date_time, function(x) as.numeric(strsplit(strsplit(x," ")[[1]][2],":")[[1]][1]))
df$min<-sapply(df$Date_time, function(x) as.numeric(strsplit(strsplit(x," ")[[1]][2],":")[[1]][2]))
df$month_name<-sapply(df$month, function(x) month.name[x])
df$month_name_ordered<-factor(df$month_name, levels =c(month.name))
df$DATE<-sapply(df$Date_time, function(x) strsplit(x,' ')[[1]][1])
df$DATE_2<-as.Date(df$DATE,"%m/%d/%Y")
df$weekday <- factor(weekdays(df$DATE_2, T), levels = rev(c("Mon", "Tue", "Wed", "Thu","Fri", "Sat", "Sun")))
df$length_min<-as.numeric(df$length_of_encounter_seconds)/60
blank_theme <- theme_fivethirtyeight()+
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title=element_text(size=14, face="bold")
)
g1<-ggplotGrob(
df %>%
group_by(UFO_shape) %>%
summarize(count=n()) %>%
ggplot(aes(x="",y=count,fill=UFO_shape)) +
geom_bar(width = 1, stat = "identity",color='white',size=.25) + coord_polar("y") +
scale_fill_manual(name="",values=viridis::inferno(length(unique(df$UFO_shape)))) +
blank_theme +
theme(axis.text.x=element_blank(),
legend.text=element_text(size=8),
legend.key.size = unit(.3, "cm")))
g2<-df %>%
group_by(year,country) %>%
summarize(count=n()) %>%
ggplot(aes(x=year,y=count,fill=country)) +
geom_histogram(stat='identity',width=1,color='white',size=.25) +
scale_fill_brewer(palette='Paired') + geom_vline(xintercept=1993,color='black') +
annotate("text", x = 1985, y = 6000, label = "X-Files\nTV-show starts",color='black') +
theme_fivethirtyeight() +
ggtitle('UFO sightings overview\nby year, shape, location') +
theme(legend.position='right',legend.direction='vertical')
g2 + annotation_custom(grob = g1, xmin = 1900, xmax = 1950, ymin = 1500, ymax = 6500)
90's
X-Files
TV show for referencelight
, then triangle
, circle
and fireball
countries_map <-map_data("world")
world_map<-ggplot() +
geom_map(data = countries_map,
map = countries_map,aes(x = long, y = lat, map_id = region, group = group),
fill = "white", color = "black", size = 0.1)
world_map + geom_point(data=df,aes(x=longitude,y=latitude),alpha=.5,size=.25) + theme_fivethirtyeight() + ggtitle('Location of UFO sightings')
ET
likes the US …states_map<-map_data("state")
usMap<-ggplot() +
geom_map(data = states_map, map = states_map,aes(x = long, y = lat, map_id = region, group = group),fill = "white", color = "black", size = 0.1) +
theme_fivethirtyeight()
usMap +
geom_point(data=filter(df,country=='us' & state.province!='hi' & state.province!='ak' & state.province!='pr'),aes(x=longitude,y=latitude),alpha=.75,size=.5) + ggtitle('Location of UFO sightings in the US')
ET
prefers the east coast and the big cities …df %>% group_by(hour,month_name_ordered) %>% summarize(count=n()) %>% ggplot(aes(x=factor(hour),y=count,color=month_name_ordered,group=month_name_ordered)) + geom_line() + theme_fivethirtyeight() + scale_color_manual(name="",values=brewer.pal(12,'Paired')) + geom_point(color='black',size=.5,alpha=1) + ggtitle('# of UFO observations during the day')
df %>% group_by(weekday,month_name_ordered) %>% summarize(count=n()) %>% ggplot(aes(x=weekday,y=count,color=month_name_ordered,group=month_name_ordered)) + geom_line() + theme_fivethirtyeight() + scale_color_brewer(name="",palette='Paired') + geom_point(color='black',size=.5,alpha=1) + ggtitle('# of UFO observations during the week')
ET
has a busy week at work and prefers to go outside during the weekend.Let’s take a look at the duration of the encounter.
First a look at the distribution of the duration (in minutes) reveals that there are some very large outliers. For example :
df %>% filter(length_min>1e6) %>% select(-c(length_of_encounter_seconds,DATE_2,description,year, month, day ,hour, min, month_name, month_name_ordered,state.province))
## Date_time city country UFO_shape described_duration_of_encounter
## 1 10/1/1983 17:00 birmingham (uk/england) gb sphere 31 years
## 2 6/3/2010 23:30 ottawa (canada) ca other 23000hrs
## 3 9/15/1991 18:00 greenbrier us light 21 years
## date_documented latitude longitude DATE weekday length_min
## 1 4/12/2013 52.46667 -1.916667 10/1/1983 Sat 1630600
## 2 7/6/2010 45.41667 -75.700000 6/3/2010 Thu 1380000
## 3 3/31/2008 35.23389 -92.387500 9/15/1991 Sun 1104600
97836000
makes 3.1 years, however the difference between the date_documented
and the Date_time
is 30 yearsdf %>%
select(length_min, country) %>%
ggplot(aes(x=log(length_min), y= country)) +
geom_joy(scale = 1.5) + theme_joy() +
ggtitle("log(encounter's duration) [min] vs. country") + xlab('minutes(log scale)') + ylab('Country')
UFO shape
df %>%
select(UFO_shape,length_min) %>%
group_by(UFO_shape) %>%
mutate(medianLength = median(log(length_min))) %>%
ggplot(aes(x=reorder(UFO_shape, medianLength),y=log(length_min),fill=UFO_shape)) +
geom_boxplot(colour='black',size=.4,alpha=.5) +
geom_jitter(shape=16,position=position_jitter(0.2),size=.25) +
coord_flip() + theme_fivethirtyeight() +
scale_fill_manual(values=viridis::inferno(length(unique(df$UFO_shape)))) +
theme(legend.position='right',legend.direction='vertical') +
ggtitle('Log(duration) [min] vs. UFO shape')
flash
, fireball
, ie quick eventsdiamond
, sphere
, cone
History :