Pls load ggplot2 and dplyr package using install.packages(“ggplot2”) and install.packages(“dplyr”)
# Loading the tidyverse readr/ggplot2/dplyr package:
library(readr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Loading the tesla-stock-data-from-2010-to-2020:
theUrl <- "https://raw.githubusercontent.com/kamathvk1982/Data607-MajorAssignment-Tidyverse/master/TSLA.csv"
tesla.hist.df <- read_csv(theUrl)
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
# Adding two new columns to get the current day gain/loss and the gain/loss percentage:
tesla.hist.df$GainLoss <- tesla.hist.df$Close - tesla.hist.df$Open
tesla.hist.df$GainLossPercent <- (tesla.hist.df$GainLoss/tesla.hist.df$Open)*100
# Sample rows from the dataset:
tail(tesla.hist.df)
ggplot2 is a system for declaratively creating graphics, based on The Grammar of Graphics. You provide the data, tell ggplot2 how to map variables to aesthetics, what graphical primitives to use, and it takes care of the details.
ggplot2 geom_line to show the stock price movement over the years for Tesla shares.
ggplot(tesla.hist.df, aes(Date, Close)) +
geom_line()
dplyr filter helps in filtering of data based on one or more conditions.
dplyr filter to show the days when the stock price for Tesla moved by over 15% (profit or loss) in one day.
tesla.hist.df %>%
filter(GainLossPercent >= 15 | GainLossPercent <= -15 ) %>%
arrange(desc(GainLossPercent))
dplyr group by and summarise helps in getting aggregated data from the given data set for one or more columns.
dplyr group by and summarise to show the yearly minimum and maximum stock price close and arranging it in descending order of movement in a year.
tesla.hist.df %>%
group_by(format(as.Date(tesla.hist.df$Date), "%Y")) %>%
summarise(min_close = min(Close) , max_close = max(Close)) %>%
arrange(desc( (max_close-min_close)/min_close)*100 )