Import necessary library

library(ISLR)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Load the data and convert the necessary variables into factor

data(package="ISLR")
stock=ISLR::Smarket
str(stock)
## 'data.frame':    1250 obs. of  9 variables:
##  $ Year     : num  2001 2001 2001 2001 2001 ...
##  $ Lag1     : num  0.381 0.959 1.032 -0.623 0.614 ...
##  $ Lag2     : num  -0.192 0.381 0.959 1.032 -0.623 ...
##  $ Lag3     : num  -2.624 -0.192 0.381 0.959 1.032 ...
##  $ Lag4     : num  -1.055 -2.624 -0.192 0.381 0.959 ...
##  $ Lag5     : num  5.01 -1.055 -2.624 -0.192 0.381 ...
##  $ Volume   : num  1.19 1.3 1.41 1.28 1.21 ...
##  $ Today    : num  0.959 1.032 -0.623 0.614 0.213 ...
##  $ Direction: Factor w/ 2 levels "Down","Up": 2 2 1 2 2 2 1 2 2 2 ...
stock$year_C=as.factor(stock$Year)

1.No of elements that are positive or negative or zero values in lag1

stock_count=stock %>% mutate(category=ifelse(Lag1>0,"Positive",
                                       ifelse(Lag1<0,"Negative","Zero")))
stock_count %>% count(category)

2.the top20% of volume traded

top_20=quantile(stock$Volume,0.8)
top_20
##      80% 
## 1.704914

3.No of days the market direction is up / down

stock_down=stock %>% group_by(Direction)
count(stock_down)

4.The median price of Today

Median=median(stock$Today)
Median
## [1] 0.0385

5.No of cases per year

ncase=stock %>% group_by(Year) 
count(ncase)

6.Creating new data set which has only 2001 data

df_stock= stock %>% filter(Year==2001)
df_stock

7.The distributions of lag1, lag3, lag5

7(a).Distribution of Lag1

ggplot(stock, aes(x=Lag1)) +
  geom_histogram(fill="pink", color="white") +
  labs(title="Distribution of Lag1", x="Lag1", y="Count") 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

7(b).Distribution of Lag2

ggplot(stock, aes(x=Lag3)) +
  geom_histogram(fill="lightgreen", color="white") +
  labs(title="Distribution of Lag3", x="Lag3", y="Count") 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

7(c).Distribution of Lag5

ggplot(stock, aes(x=Lag5)) +
  geom_histogram(fill="violet", color="white") +
  labs(title="Distribution of Lag5", x="Lag5", y="Count") 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

8.The distribution of percentage return for today based on Direction of the market

percentage=stock %>% group_by(Direction) %>% summarise(total=sum(Today)) %>% mutate(percentage=total*100/sum(total))
percentage

9.Year wise mean volume

ggplot(percentage,aes(x=percentage,fill = Direction))+geom_bar(size=3)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

mean <- stock %>% filter(Year >= 2001 & Year <= 2005) %>% group_by(Year) %>% summarise(mean_value = mean(Volume, na.rm = TRUE))
mean

10.Year wise 5 percentiles (10%, 30%, 50%, 70%, 90%) of percentage return of today

percentile<-stock %>% filter(Year>=2001 & Year<=2005) %>% group_by(Year) %>% summarise(quantile(Today,to_find=c(0.1,0.3,0.5,0.7,0.9)))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
percentile

11.The results of previous question in a tabular form

data.frame(percentile)

12.Creating a new variable in the data set for classifying Lag1 positive and zero or negative

count=stock %>% mutate(category=ifelse(Lag1>0,"Positive",
                                       ifelse(Lag1<0,"Negative","Zero")))

13.Only for UP Direction,

13(a).Categorize Lag1 price based on its median

stock %>% filter(Direction=="Up") %>% summarise(Med=median(stock$Lag1))

13(b).Categorize Lag 1 price based on quartiles

stock %>% filter(Direction=="Up") %>% summarise(quantile(stock$Lag1))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

13(c).Categorize Lag 1 price based on suitable 5 percentiles

stock %>% filter(Direction=="Up") %>% summarise(quantile(Lag1,c(0,0.25,0.5,0.75,1)))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.