install.packages("ISLR")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ISLR)
# Load ISLR package and dataset
library(ISLR)
data(Smarket)

# Convert Direction to a factor
Smarket$Direction <- as.factor(Smarket$Direction)

# Fit logistic regression model
glm_fits <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, 
                data = Smarket, 
                family = binomial)

# View model summary
summary(glm_fits)
## 
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + 
##     Volume, family = binomial, data = Smarket)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.126000   0.240736  -0.523    0.601
## Lag1        -0.073074   0.050167  -1.457    0.145
## Lag2        -0.042301   0.050086  -0.845    0.398
## Lag3         0.011085   0.049939   0.222    0.824
## Lag4         0.009359   0.049974   0.187    0.851
## Lag5         0.010313   0.049511   0.208    0.835
## Volume       0.135441   0.158360   0.855    0.392
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1731.2  on 1249  degrees of freedom
## Residual deviance: 1727.6  on 1243  degrees of freedom
## AIC: 1741.6
## 
## Number of Fisher Scoring iterations: 3
summary ( glm_fits )
## 
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + 
##     Volume, family = binomial, data = Smarket)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.126000   0.240736  -0.523    0.601
## Lag1        -0.073074   0.050167  -1.457    0.145
## Lag2        -0.042301   0.050086  -0.845    0.398
## Lag3         0.011085   0.049939   0.222    0.824
## Lag4         0.009359   0.049974   0.187    0.851
## Lag5         0.010313   0.049511   0.208    0.835
## Volume       0.135441   0.158360   0.855    0.392
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1731.2  on 1249  degrees of freedom
## Residual deviance: 1727.6  on 1243  degrees of freedom
## AIC: 1741.6
## 
## Number of Fisher Scoring iterations: 3
coef ( glm_fits )
##  (Intercept)         Lag1         Lag2         Lag3         Lag4         Lag5 
## -0.126000257 -0.073073746 -0.042301344  0.011085108  0.009358938  0.010313068 
##       Volume 
##  0.135440659
summary ( glm_fits ) $ coef
##                 Estimate Std. Error    z value  Pr(>|z|)
## (Intercept) -0.126000257 0.24073574 -0.5233966 0.6006983
## Lag1        -0.073073746 0.05016739 -1.4565986 0.1452272
## Lag2        -0.042301344 0.05008605 -0.8445733 0.3983491
## Lag3         0.011085108 0.04993854  0.2219750 0.8243333
## Lag4         0.009358938 0.04997413  0.1872757 0.8514445
## Lag5         0.010313068 0.04951146  0.2082966 0.8349974
## Volume       0.135440659 0.15835970  0.8552723 0.3924004
summary ( glm_fits ) $ coef [ , 4]
## (Intercept)        Lag1        Lag2        Lag3        Lag4        Lag5 
##   0.6006983   0.1452272   0.3983491   0.8243333   0.8514445   0.8349974 
##      Volume 
##   0.3924004
glm_probs <- predict(glm_fits, type = "response")
glm_probs [1:10]
##         1         2         3         4         5         6         7         8 
## 0.5070841 0.4814679 0.4811388 0.5152224 0.5107812 0.5069565 0.4926509 0.5092292 
##         9        10 
## 0.5176135 0.4888378
contrasts(Smarket$Direction)
##      Up
## Down  0
## Up    1
glm_pred <- rep ( " Down " , 1250)
glm_pred [ glm_probs > .5] = " Up "
table(glm_pred, Smarket$Direction)
##         
## glm_pred Down  Up
##    Down   145 141
##    Up     457 507
(507 + 145) / 1250
## [1] 0.5216
mean(glm_pred == Smarket$Direction)
## [1] 0
train <- Smarket$Year < 2005
Smarket_2005 <- Smarket[!train, ]
dim(Smarket_2005)
## [1] 252   9
install.packages("ISLR")  # Only needed if not installed
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ISLR)
data(Smarket)  # Load the dataset
colnames(Smarket)
## [1] "Year"      "Lag1"      "Lag2"      "Lag3"      "Lag4"      "Lag5"     
## [7] "Volume"    "Today"     "Direction"
Direction_2005 <- Smarket$Direction[!train]
table(Direction_2005)  # Count occurrences of "Up" and "Down"
## Direction_2005
## Down   Up 
##  111  141
length(Direction_2005) # Number of observations
## [1] 252
head(Direction_2005)   # Preview the first few values
## [1] Down Down Down Up   Down Up  
## Levels: Down Up
glm_fits <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
                data = Smarket, family = binomial, subset = train)
glm_probs <- predict(glm_fits, Smarket_2005, type = "response")
head(glm_probs)  # Check predicted probabilities
##       999      1000      1001      1002      1003      1004 
## 0.5282195 0.5156688 0.5226521 0.5138543 0.4983345 0.5010912
glm_pred <- rep ( " Down " , 252)
glm_pred [ glm_probs > .5] <- " Up "
table(glm_pred, Direction_2005)
##         Direction_2005
## glm_pred Down Up
##    Down    77 97
##    Up      34 44
mean(glm_pred == Direction_2005)  # Calculate accuracy
## [1] 0
mean ( glm_pred == Direction_2005)
## [1] 0
mean ( glm_pred != Direction_2005)
## [1] 1
glm_fits <- glm(Direction ~ Lag1 + Lag2, data = Smarket, family = binomial, subset = train)
# Load necessary library and dataset
library(ISLR)
data(Smarket)

# Convert Direction to a factor
Smarket$Direction <- as.factor(Smarket$Direction)

# Create training set (Years before 2005)
train <- Smarket$Year < 2005

# Create test set (Years 2005 and later)
test <- !train  # This creates the test set for years 2005 and later

# Fit logistic regression model on training data
glm_fits <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, 
                data = Smarket, 
                family = binomial, 
                subset = train)

# Make predictions on the test set (years 2005 and later)
glm_probs <- predict(glm_fits, Smarket[test, ], type = "response")

# Convert probabilities to class labels (Threshold = 0.5)
glm_pred <- ifelse(glm_probs > 0.5, "Up", "Down")

# Convert predictions to factor for comparison
glm_pred <- factor(glm_pred, levels = levels(Smarket$Direction))

# Calculate accuracy on test set
accuracy <- mean(glm_pred == Smarket$Direction[test])
print(accuracy)
## [1] 0.4801587
glm_pred <- rep ( " Down " , 252)
glm_pred [ glm_probs > .5] <- " Up "
table ( glm_pred , Direction_2005)
##         Direction_2005
## glm_pred Down Up
##    Down    77 97
##    Up      34 44
mean ( glm_pred == Direction_2005)
## [1] 0
106 / (106 + 76)
## [1] 0.5824176
# Make predictions on new data with all relevant variables
new_data <- data.frame(
  Lag1 = c(1.2, 1.5),
  Lag2 = c(1.1, -0.8),
  Lag3 = c(0.5, -1.2),  # Add Lag3 and other variables as needed
  Lag4 = c(0.3, 0.4),   # Ensure all variables used in the model are included
  Lag5 = c(0.6, 1.0),
  Volume = c(1000, 1500)  # Include Volume if it's part of the model
)

# Get predicted probabilities (type = "response")
glm_probs_new <- predict(glm_fits, newdata = new_data, type = "response")

# View the predicted probabilities
print(glm_probs_new)
##            1            2 
## 2.220446e-16 2.220446e-16