#import data
setwd("~/Desktop/Data Visualization")
bikeshare_data <- read_csv("bikesharedailydata.csv", col_types = cols(dteday = col_date(format = "%m/%d/%y")))
#clean data
names(bikeshare_data)[names(bikeshare_data) == "instant"] <- "instance"
names(bikeshare_data)[names(bikeshare_data) == "yr"] <- "year"
names(bikeshare_data)[names(bikeshare_data) == "hum"] <- "humidity"
names(bikeshare_data)[names(bikeshare_data) == "cnt"] <- "count"
names(bikeshare_data)[names(bikeshare_data) == "mnth"] <- "month"
names(bikeshare_data)[names(bikeshare_data) == "dteday"] <- "date"
#check for NA's
names(bikeshare_data)[sapply(bikeshare_data, anyNA)]
## [1] "season" "month"
bikeshare_data$season[7]
## [1] NA
1->bikeshare_data$season[7]
bikeshare_data$season[7]
## [1] 1
bikeshare_data$month[10]
## [1] NA
1->bikeshare_data$month[10]
bikeshare_data$month[10]
## [1] 1

#invent your own chart style

aesthetic <- theme_bw() + theme(text = element_text(family = "serif"))+ theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(color = "black"), axis.ticks.x = element_blank(),axis.ticks.y = element_blank(), legend.position = "top")

chartcolor <- "#56B4E9"

#Barplot

#Creating a bar plot to analyze rentals by season
bar <- ggplot(bikeshare_data, aes(x=bikeshare_data$season, y=count)) +
  geom_bar(stat="identity", fill = "green") + labs(title = "Bicycle Rentals per Season", subtitle = "Season 3 is most popular for bike rentals",caption = 'Data Sourced from bikesharedailydata.csv', x = "Season", y = "Rentals") + aesthetic
  scale_y_continuous(labels = comma)
## <ScaleContinuousPosition>
##  Range:  
##  Limits:    0 --    1
bar

#Line Chart

#Creating a Line Chart to analyze trends
linechart <- ggplot(bikeshare_data, aes(x=date, y=count, group=1)) +
  geom_line(color = "sky blue") + 
  geom_point(color = "sky blue", size=1) + labs(title="Line Chart for 2012", 
        subtitle="During the winter months there seems to be a clear drop in demand", caption = 'Data Sourced from bikesharedailydata.csv',
        y="Rental ", x = "Month-Year") + aesthetic + scale_y_continuous(labels = comma) +scale_x_date(date_breaks = "1 month", 
               labels=date_format("%b-%y"),
               limits = as.Date(c('2012-01-01','2012-12-31')))
linechart

#Stacked area chart

ggplot(bikeshare_data, aes(x=date, y=count, group=1)) +
  geom_area(fill="lightblue2", color=chartcolor)  +
labs(title="Area chart for 2011-2012", 
        subtitle="Warmer months in 2012 saw a much higher demand then warmer
months in 2011", caption = 'Data Sourced from bikesharedailydata.csv',
        y="Rental ", x = "time of the year") + aesthetic + scale_y_continuous(labels = comma) +scale_x_date(date_breaks = "2 month", 
               labels=date_format("%b-%y"),
               limits = as.Date(c('2011-01-01','2012-12-31')))

#Histogram

ggplot(bikeshare_data, aes(x = bikeshare_data$count )) + geom_histogram(binwidth = 300, fill = "#56B489", color = "#56B489") + stat_bin(binwidth = 300, aes(y=..count.., label=..count..), geom="text", vjust=-.5)+ labs(title = "Frequency of bike Rentals", subtitle = "Majority of Daily Rentals seem to be between 4000-5000", caption = 'Data Sourced from bikesharedailydata.csv', x = "Rentals", y = "Frequency (Days)") + aesthetic +
  scale_x_continuous(breaks=c(100 ,500, 1000,2000,3000,4000,5000, 6000,7000,8000,9000),labels=c(".1K",".5K","1K","2K","3K","4K","5K", "6K", "7K","8K","9K")) +scale_fill_gradient("Count", high="green") + scale_y_continuous(breaks = seq(0,60,5))
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.

#Density plot

RentalTotal <-bikeshare_data$count
ggplot(bikeshare_data, aes(x = RentalTotal)) + geom_density(fill = "#56B4E9", color = "#56B4E9") + labs(title = "Density plot", subtitle = "Resembles Normal Distribution - highest density roughly 4500-5000", caption = 'Data sourced from bikesharedailydata.csv', x = "Rentals", y = "Density") + theme_bw() + theme(text = element_text(family = "serif"))+ theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(color = "black"), axis.ticks.x = element_blank(),axis.ticks.y = element_blank()) + scale_x_continuous(labels = comma) + geom_vline(xintercept = round(mean(bikeshare_data$count),2),size = 1, color = "#999999", linetype = "dashed")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

#boxplot
bikeshare_data$month <-month(as.POSIXlt(bikeshare_data$date, format="%y/%m/%d"))
bikeshare_data$month <- as.factor(bikeshare_data$month)
rentmonths <- c("Jan", "Feb", "Mar", "April", "May", "June","July", "Aug", "Sept", "Oct", "Nov", "Dec")

boxplot <- ggplot(bikeshare_data, aes(x = month, y = count,
                                        group = interaction( month))) +  geom_boxplot(color ="#56B4E9",outlier.colour="red", outlier.shape=8,
                outlier.size=4)+ aesthetic + scale_x_discrete(labels=rentmonths)+labs(title = "Boxplot of bike rentals", subtitle = "July has the highest median rentals, March has the longest upper whisker", caption = 'Data sourced from bikesharedailydata.csv', x = "Months", y = "Rentals count")
  
boxplot

#ScatterPlot

scatter <- ggplot(bikeshare_data,aes(x=count,y=atemp))+ geom_point(color = "darkgreen")+ aesthetic + geom_smooth() + scale_x_continuous(breaks=c(100 ,500, 1000,2000,3000,4000,5000, 6000,7000,8000,9000),labels=c(".1K",".5K","1K","2K","3K","4K","5K", "6K", "7K","8K","9K"))+
labs(title="Scatter plot of bike rentals", 
       subtitle="Highest daily bike rental was near 9k when temperature was between 55
and 60 degrees", 
       caption="Data sourced from bikesharedailydata.csv", 
       y="Temprature ", x = "Bike Rentals")
scatter
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'