#I'll use a logistic regression model with NASDAQ yahoo finance data to predict whether a credit card holder will default on their payments based on their credit balance and income.

#I chose Credit Risk Assessment using Logistic Regression
#Step 1: Load Required Packages and Data
#We'll use the NASDAQ dataset from the ISLR package, which contains information about credit card holders including their balance, income, default status, and student status.
# Install and load necessary packages
library(ISLR)
library(dplyr)   # For data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2) # For plotting

# Check current working directory
print(getwd())
## [1] "/cloud/project"
# List files in the current directory
print(list.files())
## [1] "^IXIC.csv"               "project.Rproj"          
## [3] "softare final part2.Rmd" "softare-final-part2.Rmd"
# Read the dataset
X_IXIC <- read.csv("^IXIC.csv")

# Check the structure and first few rows of the dataset
str(X_IXIC)
## 'data.frame':    253 obs. of  7 variables:
##  $ Date     : chr  "2023-06-13" "2023-06-14" "2023-06-15" "2023-06-16" ...
##  $ Open     : num  13567 13571 13573 13859 13642 ...
##  $ High     : num  13594 13662 13828 13864 13711 ...
##  $ Low      : num  13473 13456 13561 13681 13562 ...
##  $ Close    : num  13573 13626 13783 13690 13667 ...
##  $ Adj.Close: num  13573 13626 13783 13690 13667 ...
##  $ Volume   : num  5.52e+09 5.77e+09 5.67e+09 8.08e+09 5.24e+09 ...
head(X_IXIC)
##         Date     Open     High      Low    Close Adj.Close     Volume
## 1 2023-06-13 13566.53 13594.40 13473.19 13573.32  13573.32 5522100000
## 2 2023-06-14 13570.56 13661.74 13455.99 13626.48  13626.48 5772550000
## 3 2023-06-15 13572.88 13828.17 13561.37 13782.82  13782.82 5667520000
## 4 2023-06-16 13859.07 13864.06 13680.95 13689.57  13689.57 8076530000
## 5 2023-06-20 13642.29 13711.18 13561.84 13667.29  13667.29 5237710000
## 6 2023-06-21 13620.87 13638.57 13460.94 13502.20  13502.20 5194640000
# Create the Difference and movement variables
X_IXIC <- X_IXIC %>%
  mutate(Difference = Close - Open,
         movement = ifelse(Difference >= 0, "Up", "Down"))

# Convert movement to a factor
X_IXIC$movement <- factor(X_IXIC$movement, levels = c("Down", "Up"))

# Split data into training and testing sets
set.seed(123) # Set seed for reproducibility
train_index <- sample(nrow(X_IXIC), 0.8 * nrow(X_IXIC))
train_data <- X_IXIC[train_index, ]
test_data <- X_IXIC[-train_index, ]

# Fit logistic regression model
model <- glm(movement ~ Difference, data = train_data, family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(model)
## 
## Call:
## glm(formula = movement ~ Difference, family = "binomial", data = train_data)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   -11.52     925.39  -0.012    0.990
## Difference     15.26     705.60   0.022    0.983
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2.7614e+02  on 201  degrees of freedom
## Residual deviance: 1.6288e-06  on 200  degrees of freedom
## AIC: 4
## 
## Number of Fisher Scoring iterations: 25
# Predict on test data
probabilities <- predict(model, newdata = test_data, type = "response")
predictions <- ifelse(probabilities > 0.5, "Up", "Down")

# Create confusion matrix
conf_matrix <- table(Predicted = predictions, Actual = test_data$movement)
print(conf_matrix)
##          Actual
## Predicted Down Up
##      Down   23  0
##      Up      0 28
# Calculate accuracy
accuracy <- mean(predictions == test_data$movement)
cat("Accuracy:", accuracy, "\n")
## Accuracy: 1

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.