Data Loading and Preparation

# Load necessary libraries
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Load the dataset
data <- read.csv("C:/Users/aiden/OneDrive/mergedfile.csv")

# Inspect the structure of the dataset
str(data)
## 'data.frame':    352097 obs. of  23 variables:
##  $ Date               : chr  "2010-01-04" "2010-01-05" "2010-01-06" "2010-01-07" ...
##  $ Symbol             : chr  "MMM" "MMM" "MMM" "MMM" ...
##  $ Adj.Close          : num  44.3 44 44.6 44.6 44.9 ...
##  $ Close              : num  69.4 69 70 70 70.5 ...
##  $ High               : num  69.8 69.6 70.7 70 70.5 ...
##  $ Low                : num  69.1 68.3 69.8 68.7 69.6 ...
##  $ Open               : num  69.5 69.2 70.1 69.7 70 ...
##  $ Volume             : num  3640265 3405012 6301126 5346240 4073337 ...
##  $ Exchange           : chr  "NYQ" "NYQ" "NYQ" "NYQ" ...
##  $ Shortname          : chr  "3M Company" "3M Company" "3M Company" "3M Company" ...
##  $ Longname           : chr  "3M Company" "3M Company" "3M Company" "3M Company" ...
##  $ Sector             : chr  "Industrials" "Industrials" "Industrials" "Industrials" ...
##  $ Industry           : chr  "Conglomerates" "Conglomerates" "Conglomerates" "Conglomerates" ...
##  $ Currentprice       : num  131 131 131 131 131 ...
##  $ Marketcap          : num  7.17e+10 7.17e+10 7.17e+10 7.17e+10 7.17e+10 ...
##  $ Ebitda             : num  7.35e+09 7.35e+09 7.35e+09 7.35e+09 7.35e+09 ...
##  $ Revenuegrowth      : num  -0.004 -0.004 -0.004 -0.004 -0.004 -0.004 -0.004 -0.004 -0.004 -0.004 ...
##  $ City               : chr  "Saint Paul" "Saint Paul" "Saint Paul" "Saint Paul" ...
##  $ State              : chr  "MN" "MN" "MN" "MN" ...
##  $ Country            : chr  "United States" "United States" "United States" "United States" ...
##  $ Fulltimeemployees  : num  85000 85000 85000 85000 85000 85000 85000 85000 85000 85000 ...
##  $ Longbusinesssummary: chr  "3M Company provides diversified technology services in the United States and internationally. The company's Saf"| __truncated__ "3M Company provides diversified technology services in the United States and internationally. The company's Saf"| __truncated__ "3M Company provides diversified technology services in the United States and internationally. The company's Saf"| __truncated__ "3M Company provides diversified technology services in the United States and internationally. The company's Saf"| __truncated__ ...
##  $ Weight             : num  0.00137 0.00137 0.00137 0.00137 0.00137 ...

Selecting and Preparing the Binary Variable

# Create a binary variable (e.g., Profit/Loss indicator)
# Assuming 'Adj.Close' as an example to create a binary variable for positive change in closing price

data <- data %>%
  arrange(Date) %>%
  mutate(Profit = ifelse(Adj.Close > lag(Adj.Close), 1, 0))

# Remove any NA values generated during this transformation
data <- na.omit(data)

# Check distribution of the binary variable
table(data$Profit)
## 
##      0      1 
## 147250 158763

Building the Logistic Regression Model

# Build the logistic regression model using 1-4 explanatory variables
model <- glm(Profit ~ Volume + Sector + Marketcap, data = data, family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Display the summary of the model
summary(model)
## 
## Call:
## glm(formula = Profit ~ Volume + Sector + Marketcap, family = binomial, 
##     data = data)
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   1.749e+00  3.286e-02   53.23   <2e-16 ***
## Volume                       -4.779e-08  5.803e-10  -82.35   <2e-16 ***
## SectorCommunication Services -5.168e+00  5.569e-02  -92.82   <2e-16 ***
## SectorConsumer Cyclical      -2.025e+00  3.428e-02  -59.08   <2e-16 ***
## SectorConsumer Defensive     -1.415e+00  3.609e-02  -39.19   <2e-16 ***
## SectorEnergy                 -1.917e+00  4.066e-02  -47.16   <2e-16 ***
## SectorFinancial Services     -1.875e+00  3.449e-02  -54.35   <2e-16 ***
## SectorHealthcare             -7.771e-01  3.441e-02  -22.58   <2e-16 ***
## SectorIndustrials            -2.232e+00  3.473e-02  -64.26   <2e-16 ***
## SectorReal Estate            -1.558e+00  3.570e-02  -43.64   <2e-16 ***
## SectorTechnology             -1.489e+00  3.426e-02  -43.45   <2e-16 ***
## SectorUtilities              -1.400e+00  3.578e-02  -39.12   <2e-16 ***
## Marketcap                     1.953e-12  2.364e-14   82.60   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 423791  on 306012  degrees of freedom
## Residual deviance: 384362  on 306000  degrees of freedom
## AIC: 384388
## 
## Number of Fisher Scoring iterations: 6

Building a Confidence Interval

# Coefficients interpretation
coefficients <- coef(summary(model))
coefficients
##                                   Estimate   Std. Error   z value      Pr(>|z|)
## (Intercept)                   1.749200e+00 3.286442e-02  53.22473  0.000000e+00
## Volume                       -4.778809e-08 5.802962e-10 -82.35120  0.000000e+00
## SectorCommunication Services -5.168468e+00 5.568520e-02 -92.81583  0.000000e+00
## SectorConsumer Cyclical      -2.025390e+00 3.427985e-02 -59.08398  0.000000e+00
## SectorConsumer Defensive     -1.414653e+00 3.609385e-02 -39.19375  0.000000e+00
## SectorEnergy                 -1.917357e+00 4.065965e-02 -47.15627  0.000000e+00
## SectorFinancial Services     -1.874775e+00 3.449349e-02 -54.35156  0.000000e+00
## SectorHealthcare             -7.771192e-01 3.441175e-02 -22.58296 6.373448e-113
## SectorIndustrials            -2.231867e+00 3.472993e-02 -64.26352  0.000000e+00
## SectorReal Estate            -1.557737e+00 3.569590e-02 -43.63909  0.000000e+00
## SectorTechnology             -1.488934e+00 3.426420e-02 -43.45451  0.000000e+00
## SectorUtilities              -1.399544e+00 3.577622e-02 -39.11941  0.000000e+00
## Marketcap                     1.952587e-12 2.363962e-14  82.59808  0.000000e+00

Insights and Further Investigation

# Summarize your findings and discuss their significance
cat("The logistic regression model indicates that [explain your findings].")
## The logistic regression model indicates that [explain your findings].
cat("The coefficient for [Volume] suggests that for each unit increase in volume, the odds of [explain interpretation].")
## The coefficient for [Volume] suggests that for each unit increase in volume, the odds of [explain interpretation].
cat("Further analysis could explore [additional variables or other insights].")
## Further analysis could explore [additional variables or other insights].