install.packages("ISLR")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ISLR)
# Load ISLR package and dataset
library(ISLR)
data(Smarket)
# Convert Direction to a factor
Smarket$Direction <- as.factor(Smarket$Direction)
# Fit logistic regression model
glm_fits <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
data = Smarket,
family = binomial)
# View model summary
summary(glm_fits)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Smarket)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.126000 0.240736 -0.523 0.601
## Lag1 -0.073074 0.050167 -1.457 0.145
## Lag2 -0.042301 0.050086 -0.845 0.398
## Lag3 0.011085 0.049939 0.222 0.824
## Lag4 0.009359 0.049974 0.187 0.851
## Lag5 0.010313 0.049511 0.208 0.835
## Volume 0.135441 0.158360 0.855 0.392
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1731.2 on 1249 degrees of freedom
## Residual deviance: 1727.6 on 1243 degrees of freedom
## AIC: 1741.6
##
## Number of Fisher Scoring iterations: 3
summary ( glm_fits )
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Smarket)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.126000 0.240736 -0.523 0.601
## Lag1 -0.073074 0.050167 -1.457 0.145
## Lag2 -0.042301 0.050086 -0.845 0.398
## Lag3 0.011085 0.049939 0.222 0.824
## Lag4 0.009359 0.049974 0.187 0.851
## Lag5 0.010313 0.049511 0.208 0.835
## Volume 0.135441 0.158360 0.855 0.392
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1731.2 on 1249 degrees of freedom
## Residual deviance: 1727.6 on 1243 degrees of freedom
## AIC: 1741.6
##
## Number of Fisher Scoring iterations: 3
coef ( glm_fits )
## (Intercept) Lag1 Lag2 Lag3 Lag4 Lag5
## -0.126000257 -0.073073746 -0.042301344 0.011085108 0.009358938 0.010313068
## Volume
## 0.135440659
summary ( glm_fits ) $ coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.126000257 0.24073574 -0.5233966 0.6006983
## Lag1 -0.073073746 0.05016739 -1.4565986 0.1452272
## Lag2 -0.042301344 0.05008605 -0.8445733 0.3983491
## Lag3 0.011085108 0.04993854 0.2219750 0.8243333
## Lag4 0.009358938 0.04997413 0.1872757 0.8514445
## Lag5 0.010313068 0.04951146 0.2082966 0.8349974
## Volume 0.135440659 0.15835970 0.8552723 0.3924004
summary ( glm_fits ) $ coef [ , 4]
## (Intercept) Lag1 Lag2 Lag3 Lag4 Lag5
## 0.6006983 0.1452272 0.3983491 0.8243333 0.8514445 0.8349974
## Volume
## 0.3924004
glm_probs <- predict(glm_fits, type = "response")
glm_probs [1:10]
## 1 2 3 4 5 6 7 8
## 0.5070841 0.4814679 0.4811388 0.5152224 0.5107812 0.5069565 0.4926509 0.5092292
## 9 10
## 0.5176135 0.4888378
contrasts(Smarket$Direction)
## Up
## Down 0
## Up 1
glm_pred <- rep ( " Down " , 1250)
glm_pred [ glm_probs > .5] = " Up "
table(glm_pred, Smarket$Direction)
##
## glm_pred Down Up
## Down 145 141
## Up 457 507
(507 + 145) / 1250
## [1] 0.5216
mean(glm_pred == Smarket$Direction)
## [1] 0
train <- Smarket$Year < 2005
Smarket_2005 <- Smarket[!train, ]
dim(Smarket_2005)
## [1] 252 9
install.packages("ISLR") # Only needed if not installed
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ISLR)
data(Smarket) # Load the dataset
colnames(Smarket)
## [1] "Year" "Lag1" "Lag2" "Lag3" "Lag4" "Lag5"
## [7] "Volume" "Today" "Direction"
Direction_2005 <- Smarket$Direction[!train]
table(Direction_2005) # Count occurrences of "Up" and "Down"
## Direction_2005
## Down Up
## 111 141
length(Direction_2005) # Number of observations
## [1] 252
head(Direction_2005) # Preview the first few values
## [1] Down Down Down Up Down Up
## Levels: Down Up
glm_fits <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
data = Smarket, family = binomial, subset = train)
glm_probs <- predict(glm_fits, Smarket_2005, type = "response")
head(glm_probs) # Check predicted probabilities
## 999 1000 1001 1002 1003 1004
## 0.5282195 0.5156688 0.5226521 0.5138543 0.4983345 0.5010912
glm_pred <- rep ( " Down " , 252)
glm_pred [ glm_probs > .5] <- " Up "
table(glm_pred, Direction_2005)
## Direction_2005
## glm_pred Down Up
## Down 77 97
## Up 34 44
mean(glm_pred == Direction_2005) # Calculate accuracy
## [1] 0
mean ( glm_pred == Direction_2005)
## [1] 0
mean ( glm_pred != Direction_2005)
## [1] 1
glm_fits <- glm(Direction ~ Lag1 + Lag2, data = Smarket, family = binomial, subset = train)
# Load necessary library and dataset
library(ISLR)
data(Smarket)
# Convert Direction to a factor
Smarket$Direction <- as.factor(Smarket$Direction)
# Create training set (Years before 2005)
train <- Smarket$Year < 2005
# Create test set (Years 2005 and later)
test <- !train # This creates the test set for years 2005 and later
# Fit logistic regression model on training data
glm_fits <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
data = Smarket,
family = binomial,
subset = train)
# Make predictions on the test set (years 2005 and later)
glm_probs <- predict(glm_fits, Smarket[test, ], type = "response")
# Convert probabilities to class labels (Threshold = 0.5)
glm_pred <- ifelse(glm_probs > 0.5, "Up", "Down")
# Convert predictions to factor for comparison
glm_pred <- factor(glm_pred, levels = levels(Smarket$Direction))
# Calculate accuracy on test set
accuracy <- mean(glm_pred == Smarket$Direction[test])
print(accuracy)
## [1] 0.4801587
glm_pred <- rep ( " Down " , 252)
glm_pred [ glm_probs > .5] <- " Up "
table ( glm_pred , Direction_2005)
## Direction_2005
## glm_pred Down Up
## Down 77 97
## Up 34 44
mean ( glm_pred == Direction_2005)
## [1] 0
106 / (106 + 76)
## [1] 0.5824176
# Make predictions on new data with all relevant variables
new_data <- data.frame(
Lag1 = c(1.2, 1.5),
Lag2 = c(1.1, -0.8),
Lag3 = c(0.5, -1.2), # Add Lag3 and other variables as needed
Lag4 = c(0.3, 0.4), # Ensure all variables used in the model are included
Lag5 = c(0.6, 1.0),
Volume = c(1000, 1500) # Include Volume if it's part of the model
)
# Get predicted probabilities (type = "response")
glm_probs_new <- predict(glm_fits, newdata = new_data, type = "response")
# View the predicted probabilities
print(glm_probs_new)
## 1 2
## 2.220446e-16 2.220446e-16