The data contains all days, I’m only interested in mondays. So I filter out all the rows (observations that don’t have a “weeklyChange” (Data only stored on a monday))
weeks = tbl(dbObject,'Weeks')
aStock = tbl(dbObject, as.character(stockTables[2]))
currentStock = collect(aStock)
currentStock
## # A tibble: 2,730 × 10
## Week Date Open High Low Close Volume AdjClose
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 44 2016-11-03 110.98 111.46 109.55 109.83 26111000 109.8300
## 2 44 2016-11-02 111.40 112.35 111.23 111.59 28331700 111.0200
## 3 44 2016-11-01 113.46 113.77 110.53 111.49 43825800 110.9205
## 4 44 2016-10-31 113.65 114.23 113.20 113.54 26419400 112.9600
## 5 43 2016-10-28 113.87 115.21 113.45 113.72 37861700 113.1391
## 6 43 2016-10-27 115.39 115.86 114.10 114.48 34562000 113.8952
## 7 43 2016-10-26 114.31 115.70 113.31 115.59 66134200 114.9996
## 8 43 2016-10-25 117.95 118.36 117.31 118.25 48129000 117.6460
## 9 43 2016-10-24 117.10 117.74 117.00 117.65 23538700 117.0490
## 10 42 2016-10-21 116.81 116.91 116.28 116.60 23192700 116.0044
## # ... with 2,720 more rows, and 2 more variables: WeeklyChange <dbl>,
## # Sell <chr>
currentStock$Date = as.Date(currentStock$Date)
weeklyData = filter(currentStock, !WeeklyChange == is.na(WeeklyChange))
# Quick test if we can make a linear regression
hist(weeklyData$Open)

ggplot(currentStock, aes(Date, Open)) + geom_line() +
xlab("") + ylab("Opening") + geom_smooth(method = "lm", se = FALSE)

afterSplit = filter(weeklyData, Date >= as.Date('2014-07-01'))
simpleReg = lm(afterSplit$AdjClose~afterSplit$Date)
ggplot(data=afterSplit, aes(Open)) +
geom_histogram(
col="red",
fill="green",
alpha = .2) +
labs(title="Histogram for Apple's opening stock prices") +
labs(x="Price", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(afterSplit, aes(x=Date, y=Open,color=Sell)) + geom_point() +
xlab("") + ylab("Opening") + geom_smooth(method = "lm", se = FALSE,color='orange') + geom_hline(yintercept=mean(afterSplit$Open))

beforeSplit = filter(weeklyData, Date <= as.Date('2014-06-025'))
ggplot(beforeSplit, aes(x=Date, y=Open,color=Sell)) + geom_line() +
xlab("") + ylab("Opening") + geom_smooth(method = "lm", se = FALSE,color='purple') + geom_hline(yintercept=mean(beforeSplit$Open))

probability = fractions(length(beforeSplit$Sell[beforeSplit$Sell == 'Short']) / length(beforeSplit$Sell[beforeSplit$Sell == 'Long']))
failures = 1
predictions = c()
for(L in 1:nrow(beforeSplit)){
if(beforeSplit$Sell[L] == 'Long'){
failures = failures+1
predictions[L] = round(dgeom(failures,probability),3)
}
else{
failures = 0
predictions[L] = round(probability,3)
}
}
beforeSplitWithProbability = cbind(beforeSplit,predictions)
ggplot(beforeSplit, aes(x=Date, y=Open,color=predictions)) + geom_point(size=2) +
xlab("") + ylab("Opening") + geom_smooth(method = "lm", se = FALSE,color='purple') + geom_hline(yintercept=mean(beforeSplit$Open)) + xlim(as.Date('2011-01-01'), as.Date('2013-01-01'))+scale_colour_gradient2()
## Warning: Removed 334 rows containing non-finite values (stat_smooth).
## Warning: Removed 334 rows containing missing values (geom_point).
