Data Loading and Preparation
# Load necessary libraries
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load the dataset
data <- read.csv("C:/Users/aiden/OneDrive/mergedfile.csv")
# Inspect the structure of the dataset
str(data)
## 'data.frame': 352097 obs. of 23 variables:
## $ Date : chr "2010-01-04" "2010-01-05" "2010-01-06" "2010-01-07" ...
## $ Symbol : chr "MMM" "MMM" "MMM" "MMM" ...
## $ Adj.Close : num 44.3 44 44.6 44.6 44.9 ...
## $ Close : num 69.4 69 70 70 70.5 ...
## $ High : num 69.8 69.6 70.7 70 70.5 ...
## $ Low : num 69.1 68.3 69.8 68.7 69.6 ...
## $ Open : num 69.5 69.2 70.1 69.7 70 ...
## $ Volume : num 3640265 3405012 6301126 5346240 4073337 ...
## $ Exchange : chr "NYQ" "NYQ" "NYQ" "NYQ" ...
## $ Shortname : chr "3M Company" "3M Company" "3M Company" "3M Company" ...
## $ Longname : chr "3M Company" "3M Company" "3M Company" "3M Company" ...
## $ Sector : chr "Industrials" "Industrials" "Industrials" "Industrials" ...
## $ Industry : chr "Conglomerates" "Conglomerates" "Conglomerates" "Conglomerates" ...
## $ Currentprice : num 131 131 131 131 131 ...
## $ Marketcap : num 7.17e+10 7.17e+10 7.17e+10 7.17e+10 7.17e+10 ...
## $ Ebitda : num 7.35e+09 7.35e+09 7.35e+09 7.35e+09 7.35e+09 ...
## $ Revenuegrowth : num -0.004 -0.004 -0.004 -0.004 -0.004 -0.004 -0.004 -0.004 -0.004 -0.004 ...
## $ City : chr "Saint Paul" "Saint Paul" "Saint Paul" "Saint Paul" ...
## $ State : chr "MN" "MN" "MN" "MN" ...
## $ Country : chr "United States" "United States" "United States" "United States" ...
## $ Fulltimeemployees : num 85000 85000 85000 85000 85000 85000 85000 85000 85000 85000 ...
## $ Longbusinesssummary: chr "3M Company provides diversified technology services in the United States and internationally. The company's Saf"| __truncated__ "3M Company provides diversified technology services in the United States and internationally. The company's Saf"| __truncated__ "3M Company provides diversified technology services in the United States and internationally. The company's Saf"| __truncated__ "3M Company provides diversified technology services in the United States and internationally. The company's Saf"| __truncated__ ...
## $ Weight : num 0.00137 0.00137 0.00137 0.00137 0.00137 ...
Selecting and Preparing the Binary Variable
# Create a binary variable (e.g., Profit/Loss indicator)
# Assuming 'Adj.Close' as an example to create a binary variable for positive change in closing price
data <- data %>%
arrange(Date) %>%
mutate(Profit = ifelse(Adj.Close > lag(Adj.Close), 1, 0))
# Remove any NA values generated during this transformation
data <- na.omit(data)
# Check distribution of the binary variable
table(data$Profit)
##
## 0 1
## 147250 158763
Building the Logistic Regression Model
# Build the logistic regression model using 1-4 explanatory variables
model <- glm(Profit ~ Volume + Sector + Marketcap, data = data, family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Display the summary of the model
summary(model)
##
## Call:
## glm(formula = Profit ~ Volume + Sector + Marketcap, family = binomial,
## data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.749e+00 3.286e-02 53.23 <2e-16 ***
## Volume -4.779e-08 5.803e-10 -82.35 <2e-16 ***
## SectorCommunication Services -5.168e+00 5.569e-02 -92.82 <2e-16 ***
## SectorConsumer Cyclical -2.025e+00 3.428e-02 -59.08 <2e-16 ***
## SectorConsumer Defensive -1.415e+00 3.609e-02 -39.19 <2e-16 ***
## SectorEnergy -1.917e+00 4.066e-02 -47.16 <2e-16 ***
## SectorFinancial Services -1.875e+00 3.449e-02 -54.35 <2e-16 ***
## SectorHealthcare -7.771e-01 3.441e-02 -22.58 <2e-16 ***
## SectorIndustrials -2.232e+00 3.473e-02 -64.26 <2e-16 ***
## SectorReal Estate -1.558e+00 3.570e-02 -43.64 <2e-16 ***
## SectorTechnology -1.489e+00 3.426e-02 -43.45 <2e-16 ***
## SectorUtilities -1.400e+00 3.578e-02 -39.12 <2e-16 ***
## Marketcap 1.953e-12 2.364e-14 82.60 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 423791 on 306012 degrees of freedom
## Residual deviance: 384362 on 306000 degrees of freedom
## AIC: 384388
##
## Number of Fisher Scoring iterations: 6
Building a Confidence Interval
# Coefficients interpretation
coefficients <- coef(summary(model))
coefficients
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.749200e+00 3.286442e-02 53.22473 0.000000e+00
## Volume -4.778809e-08 5.802962e-10 -82.35120 0.000000e+00
## SectorCommunication Services -5.168468e+00 5.568520e-02 -92.81583 0.000000e+00
## SectorConsumer Cyclical -2.025390e+00 3.427985e-02 -59.08398 0.000000e+00
## SectorConsumer Defensive -1.414653e+00 3.609385e-02 -39.19375 0.000000e+00
## SectorEnergy -1.917357e+00 4.065965e-02 -47.15627 0.000000e+00
## SectorFinancial Services -1.874775e+00 3.449349e-02 -54.35156 0.000000e+00
## SectorHealthcare -7.771192e-01 3.441175e-02 -22.58296 6.373448e-113
## SectorIndustrials -2.231867e+00 3.472993e-02 -64.26352 0.000000e+00
## SectorReal Estate -1.557737e+00 3.569590e-02 -43.63909 0.000000e+00
## SectorTechnology -1.488934e+00 3.426420e-02 -43.45451 0.000000e+00
## SectorUtilities -1.399544e+00 3.577622e-02 -39.11941 0.000000e+00
## Marketcap 1.952587e-12 2.363962e-14 82.59808 0.000000e+00
Insights and Further Investigation
# Summarize your findings and discuss their significance
cat("The logistic regression model indicates that [explain your findings].")
## The logistic regression model indicates that [explain your findings].
cat("The coefficient for [Volume] suggests that for each unit increase in volume, the odds of [explain interpretation].")
## The coefficient for [Volume] suggests that for each unit increase in volume, the odds of [explain interpretation].
cat("Further analysis could explore [additional variables or other insights].")
## Further analysis could explore [additional variables or other insights].