install.packages(“jsonlite”)
install.packages(“tidyverse”)
data(cars)
median(cars[,1])
## [1] 15
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=99"
library(jsonlite)
btc_data <- fromJSON(url)
btc_prices <- btc_data$Data$Data
head(btc_prices)
## time high low open volumefrom volumeto close
## 1 1763769600 85562.12 83462.76 85087.62 23440.30 1978095093 84696.98
## 2 1763856000 88106.69 84639.73 84696.98 27848.78 2411303413 86823.45
## 3 1763942400 89227.26 85242.01 86823.45 44709.41 3899541726 88288.33
## 4 1764028800 88494.81 86089.70 88288.33 32065.82 2799155557 87340.82
## 5 1764115200 90634.17 86298.47 87340.82 30484.49 2694221351 90487.28
## 6 1764201600 91934.77 90083.46 90487.28 21381.51 1949917850 91327.26
## conversionType conversionSymbol
## 1 direct
## 2 direct
## 3 direct
## 4 direct
## 5 direct
## 6 direct
max_close <- max(btc_prices$close)
max_close
## [1] 96945.09
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
loan_data <- read.csv("loan_risk_prediction_dataset.csv")
head(loan_data)
## Age Income LoanAmount CreditScore YearsExperience Gender Education
## 1 56 48353 31258 675 20 Female High School
## 2 69 57462 23262 586 6 Male High School
## 3 46 44219 26530 781 26 Male PhD
## 4 32 56307 11531 549 11 Male
## 5 60 37034 27871 500 19 Female High School
## 6 25 47886 18106 835 13 Male Masters
## City EmploymentType LoanApproved
## 1 Houston Unemployed 0
## 2 San Francisco Self-Employed 0
## 3 Houston Self-Employed 1
## 4 New York Unemployed 0
## 5 Chicago Unemployed 0
## 6 New York Salaried 1
str(loan_data)
## 'data.frame': 5000 obs. of 10 variables:
## $ Age : int 56 69 46 32 60 25 38 56 36 40 ...
## $ Income : num 48353 57462 44219 56307 37034 ...
## $ LoanAmount : num 31258 23262 26530 11531 27871 ...
## $ CreditScore : num 675 586 781 549 500 835 760 599 777 382 ...
## $ YearsExperience: int 20 6 26 11 19 13 9 22 29 30 ...
## $ Gender : chr "Female" "Male" "Male" "Male" ...
## $ Education : chr "High School" "High School" "PhD" "" ...
## $ City : chr "Houston" "San Francisco" "Houston" "New York" ...
## $ EmploymentType : chr "Unemployed" "Self-Employed" "Self-Employed" "Unemployed" ...
## $ LoanApproved : int 0 0 1 0 0 1 1 0 0 0 ...
summary(loan_data)
## Age Income LoanAmount CreditScore
## Min. :18.00 Min. :-3731 Min. :-10059 Min. :300.0
## 1st Qu.:31.00 1st Qu.:39608 1st Qu.: 14455 1st Qu.:433.0
## Median :43.00 Median :49488 Median : 19842 Median :579.0
## Mean :43.58 Mean :49738 Mean : 19871 Mean :575.5
## 3rd Qu.:56.00 3rd Qu.:59917 3rd Qu.: 25327 3rd Qu.:712.0
## Max. :69.00 Max. :99146 Max. : 48353 Max. :849.0
## NA's :196 NA's :194
## YearsExperience Gender Education City
## Min. : 0.0 Length:5000 Length:5000 Length:5000
## 1st Qu.:10.0 Class :character Class :character Class :character
## Median :20.0 Mode :character Mode :character Mode :character
## Mean :19.6
## 3rd Qu.:29.0
## Max. :39.0
##
## EmploymentType LoanApproved
## Length:5000 Min. :0.0000
## Class :character 1st Qu.:0.0000
## Mode :character Median :0.0000
## Mean :0.2302
## 3rd Qu.:0.0000
## Max. :1.0000
##
colSums(is.na(loan_data))
## Age Income LoanAmount CreditScore YearsExperience
## 0 196 0 194 0
## Gender Education City EmploymentType LoanApproved
## 0 0 0 0 0
loan_data <- na.omit(loan_data)
table(loan_data$LoanApproved)
##
## 0 1
## 3521 1100
prop.table(table(loan_data$LoanApproved))
##
## 0 1
## 0.7619563 0.2380437
sapply(loan_data, is.numeric)
## Age Income LoanAmount CreditScore YearsExperience
## TRUE TRUE TRUE TRUE TRUE
## Gender Education City EmploymentType LoanApproved
## FALSE FALSE FALSE FALSE TRUE
aggregate(Income ~ LoanApproved, data=loan_data, mean)
## LoanApproved Income
## 1 0 47997.98
## 2 1 55072.87
aggregate(CreditScore ~ LoanApproved, data=loan_data, mean)
## LoanApproved CreditScore
## 1 0 534.6515
## 2 1 708.7355
aggregate(LoanAmount ~ LoanApproved, data=loan_data, mean)
## LoanApproved LoanAmount
## 1 0 19802.15
## 2 1 20185.48
numeric_data <- loan_data[, sapply(loan_data, is.numeric)]
cor(numeric_data)
## Age Income LoanAmount CreditScore
## Age 1.000000000 -0.004988596 -0.013102997 0.002920979
## Income -0.004988596 1.000000000 0.001155858 -0.049995094
## LoanAmount -0.013102997 0.001155858 1.000000000 0.003068258
## CreditScore 0.002920979 -0.049995094 0.003068258 1.000000000
## YearsExperience -0.015283007 -0.015803062 0.007852464 -0.001731739
## LoanApproved -0.011412881 0.199620003 0.020367262 0.461309176
## YearsExperience LoanApproved
## Age -0.015283007 -0.011412881
## Income -0.015803062 0.199620003
## LoanAmount 0.007852464 0.020367262
## CreditScore -0.001731739 0.461309176
## YearsExperience 1.000000000 -0.006802128
## LoanApproved -0.006802128 1.000000000