Importing Libraries
library(ISLR)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(package="ISLR")
st_mr = ISLR::Smarket
str(st_mr)
## 'data.frame': 1250 obs. of 9 variables:
## $ Year : num 2001 2001 2001 2001 2001 ...
## $ Lag1 : num 0.381 0.959 1.032 -0.623 0.614 ...
## $ Lag2 : num -0.192 0.381 0.959 1.032 -0.623 ...
## $ Lag3 : num -2.624 -0.192 0.381 0.959 1.032 ...
## $ Lag4 : num -1.055 -2.624 -0.192 0.381 0.959 ...
## $ Lag5 : num 5.01 -1.055 -2.624 -0.192 0.381 ...
## $ Volume : num 1.19 1.3 1.41 1.28 1.21 ...
## $ Today : num 0.959 1.032 -0.623 0.614 0.213 ...
## $ Direction: Factor w/ 2 levels "Down","Up": 2 2 1 2 2 2 1 2 2 2 ...
st_mr$year_C = as.factor(st_mr$Year)
In Lag1, how many elements are positive, negative, or zero?
stock_count = st_mr %>%
mutate(category = ifelse(Lag1 > 0, "Positive",
ifelse(Lag1 < 0, "Negative", "Zero"))) %>%
count(category)
stock_count
What is the top 20% of volume traded?
top_20 = quantile(st_mr$Volume, 0.8)
top_20
## 80%
## 1.704914
How many days the market direction is up or down?
stock_up = st_mr %>% count(Direction)
stock_up
State the number of cases per year
case_per_year = st_mr %>%
group_by(Year) %>%
summarise(new_case = n())
case_per_year
Create a dataset with only 2001 data
new_stock = st_mr %>% filter(Year == 2001)
new_stock
Distribution of Lag1, Lag3, and Lag5
Lag1
ggplot(st_mr, aes(x = Lag1)) +
geom_histogram(binwidth = 0.5, fill = "brown", color = "white") +
labs(title = "Histogram of Lag1", x = "Lag1", y = "Count")

Lag3
ggplot(st_mr, aes(x = Lag3)) +
geom_histogram(binwidth = 0.5, fill = "skyblue", color = "white") +
labs(title = "Histogram of Lag3", x = "Lag3", y = "Count")

Lag5
ggplot(st_mr, aes(x = Lag5)) +
geom_histogram(binwidth = 0.5, fill = "darkgreen", color = "white") +
labs(title = "Histogram of Lag5", x = "Lag5", y = "Count")

Distribution of percentage return for Today based on Direction of
the market
per_dis = st_mr %>%
group_by(Direction) %>%
summarise(total = sum(Today)) %>%
mutate(percent = total * 100 / sum(total))
per_dis
ggplot(per_dis, aes(x = Direction, y = percent, fill = Direction)) +
geom_bar(stat = "identity") +
labs(title = "Percentage Return by Market Direction", x = "Direction", y = "Percentage")

Compute year-wise mean volume
yearly_mean = st_mr %>%
filter(Year >= 2001 & Year <= 2005) %>%
group_by(Year) %>%
summarise(mean_value = mean(Volume, na.rm = TRUE))
yearly_mean
Compute year-wise 5 percentiles (10%, 30%, 50%, 70%, 90%) of Today’s
return
yearly_percentile = st_mr %>%
filter(Year >= 2001 & Year <= 2005) %>%
group_by(Year) %>%
summarise(
Q10 = quantile(Today, probs = 0.1),
Q30 = quantile(Today, probs = 0.3),
Q50 = quantile(Today, probs = 0.5),
Q70 = quantile(Today, probs = 0.7),
Q90 = quantile(Today, probs = 0.9)
)
yearly_percentile
Create a new variable for classifying Lag1 as Positive or
Zero/Negative
st_mr = st_mr %>%
mutate(category = ifelse(Lag1 > 0, "Positive", "Zero/Negative"))
head(st_mr)
Custom Quantiles for Lag1 based on Market Direction
st_mr %>%
filter(Direction == "Up") %>%
summarise(Median = median(Lag1))
st_mr %>%
filter(Direction == "Up") %>%
summarise(All_Quantiles = list(quantile(Lag1)))
st_mr %>%
filter(Direction == "Up") %>%
summarise(Custom_Quantiles = list(quantile(Lag1, probs = c(0, 0.25, 0.5, 0.75, 1))))