libraries

library(ISLR)
library(dplyr)
data("Smarket")
data <- Smarket

1. Count of Positive, Negative, and Zero Elements in Lag1

data %>%
  summarise(
    Positive = sum(Lag1 > 0),
    Negative = sum(Lag1 < 0),
    Zero = sum(Lag1 == 0)
  )
## # A tibble: 1 × 3
##   Positive Negative  Zero
##      <int>    <int> <int>
## 1      648      601     1

2. Top 20% of Volume Traded

quantile(data$Volume, probs = 0.8)
##      80% 
## 1.704914

3. Number of Days Market Direction is Up/Down

data %>%
  count(Direction)
## # A tibble: 2 × 2
##   Direction     n
##   <fct>     <int>
## 1 Down        602
## 2 Up          648

4. Median Price of Today

data %>%
  summarise(Median_Today = median(Today, na.rm = TRUE))
## # A tibble: 1 × 1
##   Median_Today
##          <dbl>
## 1       0.0385

5. Number of Cases Per Year

data %>%
  mutate(Year = as.numeric(substr(Year, 1, 4))) %>%
  count(Year)
## # A tibble: 5 × 2
##    Year     n
##   <dbl> <int>
## 1  2001   242
## 2  2002   252
## 3  2003   252
## 4  2004   252
## 5  2005   252

6. Dataset for 2001

data_2001 <- data %>%
  filter(Year == 2001)

7. Distribution of Lag1, Lag3, Lag5

library(ggplot2)
data %>%
  ggplot(aes(x = Lag1)) +
  geom_histogram(binwidth = 0.1, fill = "blue", alpha = 0.7) +
  ggtitle("Distribution of Lag1")

data %>%
  ggplot(aes(x = Lag3)) +
  geom_histogram(binwidth = 0.1, fill = "green", alpha = 0.7) +
  ggtitle("Distribution of Lag3")

data %>%
  ggplot(aes(x = Lag5)) +
  geom_histogram(binwidth = 0.1, fill = "red", alpha = 0.7) +
  ggtitle("Distribution of Lag5")

8. Distribution of Percentage Return for Today Based on Market Direction

data %>%
  ggplot(aes(x = Today, fill = Direction)) +
  geom_density(alpha = 0.5) +
  ggtitle("Distribution of Percentage Return for Today Based on Market Direction")

9. Year-wise Mean Volume

data %>%
  group_by(Year) %>%
  summarise(Mean_Volume = mean(Volume, na.rm = TRUE))
## # A tibble: 5 × 2
##    Year Mean_Volume
##   <dbl>       <dbl>
## 1  2001        1.23
## 2  2002        1.43
## 3  2003        1.38
## 4  2004        1.42
## 5  2005        1.92

10. Year-wise Percentiles of Percentage Return

data %>%
  group_by(Year) %>%
  summarise(
    `10%` = quantile(Today, 0.1, na.rm = TRUE),
    `30%` = quantile(Today, 0.3, na.rm = TRUE),
    `50%` = quantile(Today, 0.5, na.rm = TRUE),
    `70%` = quantile(Today, 0.7, na.rm = TRUE),
    `90%` = quantile(Today, 0.9, na.rm = TRUE)
  )
## # A tibble: 5 × 6
##    Year  `10%`  `30%`   `50%` `70%` `90%`
##   <dbl>  <dbl>  <dbl>   <dbl> <dbl> <dbl>
## 1  2001 -1.72  -0.560 -0.0425 0.587 1.48 
## 2  2002 -2.14  -0.935 -0.178  0.626 1.85 
## 3  2003 -1.27  -0.438  0.128  0.58  1.38 
## 4  2004 -0.967 -0.257  0.0635 0.390 0.911
## 5  2005 -0.834 -0.313  0.056  0.350 0.843

11. Lag1 Categorization for UP Direction

median_lag1_up <- median(data$Lag1[data$Direction == "Up"], na.rm = TRUE)
data <- data %>%
  filter(Direction == "Up") %>%
  mutate(
    Lag1_Median_Category = ifelse(Lag1 > median_lag1_up, "Above Median", "Below Median")
  )
head(data$Lag1_Median_Category)
## [1] "Above Median" "Above Median" "Below Median" "Above Median" "Above Median"
## [6] "Below Median"
table(data$Lag1_Median_Category) 
## 
## Above Median Below Median 
##          322          326

12. Mean for Numeric Variable

data %>%
  summarise(Mean_Today = mean(Today, na.rm = TRUE))
## # A tibble: 1 × 1
##   Mean_Today
##        <dbl>
## 1      0.803

13. Add Values from Row 16 to Last Row

sum(data$Today[16:nrow(data)], na.rm = TRUE)
## [1] 508.47

14. Median for Numeric Variable

data %>%
  summarise(Median_Today = median(Today, na.rm = TRUE))
## # A tibble: 1 × 1
##   Median_Today
##          <dbl>
## 1        0.582

15. Difference Between Values in Rows 16 and 17

data$Today[17] - data$Today[16]
## [1] 0.482