By Sandy Sng
5 June 2018
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
GOOG <- as.data.frame(read.csv("~/Desktop/R Files/HistoricalQuotes.csv"))
head(GOOG)
## date close volume open high low
## 1 16:00 1,139.29 1,889,579 1,122.33 1,141.89 1,122.005
## 2 2018/06/04 1139.2900 1881046.0000 1122.3300 1141.8900 1122.0050
## 3 2018/06/01 1119.5000 2416755.0000 1099.3500 1120.0000 1098.5000
## 4 2018/05/31 1084.9900 3085325.0000 1067.5600 1097.1900 1067.5600
## 5 2018/05/30 1067.8000 1129958.0000 1063.0300 1069.2100 1056.8300
## 6 2018/05/29 1060.3200 1858676.0000 1064.8900 1073.3700 1055.2200
str(GOOG)
## 'data.frame': 1056 obs. of 6 variables:
## $ date : Factor w/ 1056 levels "16:00","2014/03/27",..: 1 1056 1055 1054 1053 1052 1051 1050 1049 1048 ...
## $ close : Factor w/ 1039 levels "1,139.29","1001.5200",..: 1 134 124 99 79 72 87 92 94 82 ...
## $ volume: Factor w/ 1055 levels "1,889,579","1002313.0000",..: 1 626 805 886 81 613 996 980 35 54 ...
## $ open : Factor w/ 1022 levels "1,122.33","1001.5000",..: 1 124 111 81 75 77 91 90 79 97 ...
## $ high : Factor w/ 1034 levels "1,141.89","1016.1000",..: 1 132 119 102 72 76 87 82 83 92 ...
## $ low : Factor w/ 1039 levels "1,122.005","1000.6600",..: 1 125 109 88 73 71 95 84 78 86 ...
Remove 1st row: Incomplete data in row for 05-Jun-2018
Remove 3rd column: Volume of GOOG (irrelevant here)
Temporarily remove date column
New (cleaned) variable is GOOG2 Change all prices to numeric format
GOOG2 <- GOOG[-1,-3]
GOOG2 <- GOOG2[nrow(GOOG2):1,] # Arrange 2014 first
GOOG2 = GOOG2[,-1] # Remove date from analysis for now
names(GOOG2)
## [1] "close" "open" "high" "low"
GOOG2$close <- as.numeric(gsub(",","",GOOG2$close))
GOOG2$open <- as.numeric(gsub(",","",GOOG2$open))
GOOG2$high <- as.numeric(gsub(",","",GOOG2$high))
GOOG2$low <- as.numeric(gsub(",","",GOOG2$low))
# Check cleaned data
summary(GOOG2)
## close open high low
## Min. : 492.6 Min. : 494.6 Min. : 496.0 Min. : 487.6
## 1st Qu.: 571.2 1st Qu.: 571.3 1st Qu.: 576.6 1st Qu.: 565.5
## Median : 737.8 Median : 736.5 Median : 742.0 Median : 731.0
## Mean : 751.6 Mean : 751.6 Mean : 757.6 Mean : 745.2
## 3rd Qu.: 915.4 3rd Qu.: 915.4 3rd Qu.: 922.2 3rd Qu.: 909.6
## Max. :1175.8 Max. :1177.3 Max. :1186.9 Max. :1172.0
GOOGstock <- as.data.frame(GOOG2) %>%
gather(result, price)
plot_ly(GOOGstock, x = GOOG2$date, y = ~price, color = ~result)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode