knitr::opts_chunk$set(echo = TRUE)

# Read the data file loanapp.csv from GutHub
LoanData = read.table(file="https://raw.githubusercontent.com/BeshkiaKvarnstrom/MSDS-Repos/main/loanapp.csv", header=TRUE,sep=",")
# Question 1. Use the summary function to gain an overview of the data set
summary(LoanData)
##        X             occ           loanamt          action           msa      
##  Min.   :   1   Min.   :1.000   Min.   :  2.0   Min.   :1.000   Min.   :1120  
##  1st Qu.: 498   1st Qu.:1.000   1st Qu.:100.0   1st Qu.:1.000   1st Qu.:1120  
##  Median : 995   Median :1.000   Median :126.0   Median :1.000   Median :1120  
##  Mean   : 995   Mean   :1.032   Mean   :143.2   Mean   :1.276   Mean   :1120  
##  3rd Qu.:1492   3rd Qu.:1.000   3rd Qu.:165.0   3rd Qu.:1.000   3rd Qu.:1120  
##  Max.   :1989   Max.   :3.000   Max.   :980.0   Max.   :3.000   Max.   :1120  
##                                                                               
##     suffolk           appinc           typur            unit      
##  Min.   :0.0000   Min.   :  0.00   Min.   :0.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.: 48.00   1st Qu.:0.000   1st Qu.:1.000  
##  Median :0.0000   Median : 64.00   Median :0.000   Median :1.000  
##  Mean   :0.1543   Mean   : 84.68   Mean   :1.531   Mean   :1.122  
##  3rd Qu.:0.0000   3rd Qu.: 88.00   3rd Qu.:1.000   3rd Qu.:1.000  
##  Max.   :1.0000   Max.   :972.00   Max.   :9.000   Max.   :4.000  
##                                                    NA's   :4      
##     married            dep              emp              yjob      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :1.0000   Median :0.0000   Median :0.0000   Median :0.000  
##  Mean   :0.6586   Mean   :0.7709   Mean   :0.2097   Mean   :0.449  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.000  
##  Max.   :1.0000   Max.   :8.0000   Max.   :9.0000   Max.   :9.000  
##  NA's   :3        NA's   :3                                        
##       self           atotinc         cototinc          hexp      
##  Min.   :0.0000   Min.   :    0   Min.   :    0   Min.   :  154  
##  1st Qu.:0.0000   1st Qu.: 2876   1st Qu.:    0   1st Qu.: 1054  
##  Median :0.0000   Median : 3813   Median : 1145   Median : 1317  
##  Mean   :0.1292   Mean   : 5196   Mean   : 1547   Mean   : 1505  
##  3rd Qu.:0.0000   3rd Qu.: 5596   3rd Qu.: 2417   3rd Qu.: 1715  
##  Max.   :1.0000   Max.   :81000   Max.   :41667   Max.   :10798  
##                                                                  
##      price            other              liq               rep       
##  Min.   :  25.0   Min.   :   0.00   Min.   :      0   Min.   :0.000  
##  1st Qu.: 129.0   1st Qu.:   0.00   1st Qu.:     20   1st Qu.:1.000  
##  Median : 163.0   Median :   0.00   Median :     38   Median :1.000  
##  Mean   : 196.3   Mean   :   2.37   Mean   :   4618   Mean   :1.503  
##  3rd Qu.: 225.0   3rd Qu.:   0.00   3rd Qu.:     83   3rd Qu.:2.000  
##  Max.   :1535.0   Max.   :1020.00   Max.   :1000000   Max.   :9.000  
##                                                       NA's   :9      
##      gdlin             lines              mortg            cons     
##  Min.   :  0.000   Min.   :     0.0   Min.   :1.000   Min.   :1.00  
##  1st Qu.:  1.000   1st Qu.:     7.0   1st Qu.:1.000   1st Qu.:1.00  
##  Median :  1.000   Median :    12.0   Median :2.000   Median :1.00  
##  Mean   :  1.583   Mean   :   516.4   Mean   :1.708   Mean   :2.11  
##  3rd Qu.:  1.000   3rd Qu.:    19.0   3rd Qu.:2.000   3rd Qu.:2.00  
##  Max.   :666.000   Max.   :999999.4   Max.   :4.000   Max.   :6.00  
##                                                                     
##      pubrec             hrat           obrat           fixadj      
##  Min.   :0.00000   Min.   : 1.00   Min.   : 0.00   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:21.00   1st Qu.:28.00   1st Qu.:0.0000  
##  Median :0.00000   Median :25.77   Median :33.00   Median :0.0000  
##  Mean   :0.06888   Mean   :24.79   Mean   :32.39   Mean   :0.3082  
##  3rd Qu.:0.00000   3rd Qu.:29.00   3rd Qu.:37.00   3rd Qu.:1.0000  
##  Max.   :1.00000   Max.   :72.00   Max.   :95.00   Max.   :1.0000  
##                                                                    
##       term             apr              prop            inss       
##  Min.   :     6   Min.   :  25.0   Min.   :1.000   Min.   :0.0000  
##  1st Qu.:   360   1st Qu.: 135.0   1st Qu.:2.000   1st Qu.:0.0000  
##  Median :   360   Median : 169.0   Median :2.000   Median :0.0000  
##  Mean   :  2352   Mean   : 205.1   Mean   :1.861   Mean   :0.2001  
##  3rd Qu.:   360   3rd Qu.: 230.0   3rd Qu.:2.000   3rd Qu.:0.0000  
##  Max.   :999999   Max.   :4316.0   Max.   :3.000   Max.   :1.0000  
##                                                                    
##      inson              gift            cosign            unver        
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.01508   Mean   :0.1599   Mean   :0.02866   Mean   :0.04274  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.00000   Max.   :1.00000  
##                                                                        
##      review           netw              unem            min30        
##  Min.   :  0.0   Min.   :-7919.0   Min.   : 1.800   Min.   :0.00000  
##  1st Qu.:  1.0   1st Qu.:   43.0   1st Qu.: 3.100   1st Qu.:0.00000  
##  Median :  2.0   Median :   95.0   Median : 3.200   Median :0.00000  
##  Mean   :113.7   Mean   :  266.6   Mean   : 3.882   Mean   :0.05703  
##  3rd Qu.:  3.0   3rd Qu.:  229.6   3rd Qu.: 3.900   3rd Qu.:0.00000  
##  Max.   :999.0   Max.   :28023.0   Max.   :10.600   Max.   :1.00000  
##                                                     NA's   :183      
##        bd               mi              old               vr        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.4208   Mean   :0.8728   Mean   :0.4676   Mean   :0.4098  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##                                                                     
##       sch             black             hispan             male       
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:1.0000  
##  Median :1.0000   Median :0.00000   Median :0.00000   Median :1.0000  
##  Mean   :0.7717   Mean   :0.09904   Mean   :0.05581   Mean   :0.8131  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.00000   Max.   :1.0000  
##                                                       NA's   :15      
##      reject          approve           mortno          mortperf     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :1.0000  
##  Mean   :0.1227   Mean   :0.8773   Mean   :0.3318   Mean   :0.6385  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##                                                                     
##     mortlat1          mortlat2           chist            multi        
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:1.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :1.0000   Median :0.00000  
##  Mean   :0.01911   Mean   :0.01056   Mean   :0.8376   Mean   :0.08615  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.0000   Max.   :1.00000  
##                                                       NA's   :4        
##     loanprc            thick            white       
##  Min.   :0.02105   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.70000   1st Qu.:0.0000   1st Qu.:1.0000  
##  Median :0.80000   Median :0.0000   Median :1.0000  
##  Mean   :0.77064   Mean   :0.1051   Mean   :0.8451  
##  3rd Qu.:0.89894   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :2.57143   Max.   :1.0000   Max.   :1.0000  
##                    NA's   :9
# Question 1. Display the mean and median for at least two attributes.
MeanLoan <- sapply(LoanData[, c("loanamt", "netw", "loanprc")], mean)
MedianLoan <- sapply(LoanData[, c("loanamt", "netw", "loanprc")], median)
MeanMedianLoanDF <- data.frame(rbind(MeanLoan, MedianLoan))
print(MeanMedianLoanDF)
##             loanamt     netw   loanprc
## MeanLoan   143.2453 266.5691 0.7706397
## MedianLoan 126.0000  95.0000 0.8000000
# Question 2. Create a new data frame with a subset of the columns and rows. Make sure to rename it.
LoanData_sub <- subset(LoanData, loanamt > 300 & term > 240)
LoanData_sub <- LoanData_sub[1:10, c("married", "loanamt", "price", "liq", "netw", "loanprc")]
head(LoanData_sub)
##    married loanamt price    liq   netw   loanprc
## 19       1     349   387   57.0  598.0 0.9018088
## 45       1     315   355   42.9   66.7 0.8873239
## 47       1     632   790  183.0 1427.0 0.8000000
## 61       1     310   457  154.0  193.0 0.6783370
## 89       1     732   975 1213.0 1550.0 0.7507693
## 99       0     320   630  421.2 1051.2 0.5079365
# Question 3. Create new column names for the new data frame.
colnames(LoanData_sub) <- c("Marital Status", "Loan Amount","Loan Price", "Liquid", "Networth", "Loan PRC")
head(LoanData_sub)
##    Marital Status Loan Amount Loan Price Liquid Networth  Loan PRC
## 19              1         349        387   57.0    598.0 0.9018088
## 45              1         315        355   42.9     66.7 0.8873239
## 47              1         632        790  183.0   1427.0 0.8000000
## 61              1         310        457  154.0    193.0 0.6783370
## 89              1         732        975 1213.0   1550.0 0.7507693
## 99              0         320        630  421.2   1051.2 0.5079365
# Question 4. Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.

summary(LoanData_sub)
##  Marital Status  Loan Amount      Loan Price        Liquid      
##  Min.   :0.0    Min.   :304.0   Min.   :355.0   Min.   :  42.9  
##  1st Qu.:1.0    1st Qu.:312.8   1st Qu.:387.8   1st Qu.:  75.0  
##  Median :1.0    Median :334.5   Median :538.5   Median : 168.5  
##  Mean   :0.9    Mean   :417.4   Mean   :563.4   Mean   : 324.9  
##  3rd Qu.:1.0    3rd Qu.:450.0   3rd Qu.:645.0   3rd Qu.: 407.1  
##  Max.   :1.0    Max.   :732.0   Max.   :975.0   Max.   :1213.0  
##     Networth         Loan PRC     
##  Min.   :  66.7   Min.   :0.5079  
##  1st Qu.: 366.2   1st Qu.:0.7007  
##  Median : 746.7   Median :0.7754  
##  Mean   : 754.1   Mean   :0.7544  
##  3rd Qu.:1006.7   3rd Qu.:0.8000  
##  Max.   :1550.0   Max.   :0.9018
MeanLoan <- sapply(LoanData_sub[, c("Loan Amount", "Networth", "Loan PRC")], mean)
MedianLoan <- sapply(LoanData_sub[, c("Loan Amount", "Networth", "Loan PRC")], median)
MeanMedianLoanSubDF <- data.frame(rbind(MeanLoan, MedianLoan))
print(MeanMedianLoanSubDF)
##            Loan.Amount Networth  Loan.PRC
## MeanLoan         417.4  754.131 0.7544290
## MedianLoan       334.5  746.655 0.7753846
compare_mn_mdn <- MeanMedianLoanDF - MeanMedianLoanSubDF
names(compare_mn_mdn) <- paste(names(compare_mn_mdn), "_Diff", sep = "")
print(compare_mn_mdn)
##            loanamt_Diff netw_Diff loanprc_Diff
## MeanLoan      -274.1547 -487.5619   0.01621070
## MedianLoan    -208.5000 -651.6550   0.02461538
# Question 5. For at least 3 values in a column please rename so that every value in that column is renamed. For example, suppose I have 20 values of the letter “e” in one column. Rename those values so that all 20 would show as “excellent”.
LoanData_sub$`Marital Status` <- with(LoanData_sub, replace(`Marital Status`, `Marital Status`=="0", "Single"))
LoanData_sub$`Marital Status` <- with(LoanData_sub, replace(`Marital Status`, `Marital Status`=="1", "Married"))
LoanData_sub$`Marital Status` <- with(LoanData_sub, replace(`Marital Status`, `Marital Status`=="NA", "Divorced"))

head(LoanData_sub)
##    Marital Status Loan Amount Loan Price Liquid Networth  Loan PRC
## 19        Married         349        387   57.0    598.0 0.9018088
## 45        Married         315        355   42.9     66.7 0.8873239
## 47        Married         632        790  183.0   1427.0 0.8000000
## 61        Married         310        457  154.0    193.0 0.6783370
## 89        Married         732        975 1213.0   1550.0 0.7507693
## 99         Single         320        630  421.2   1051.2 0.5079365
# Question 6. Display enough rows to see examples of all of steps 1-5 above.
head(LoanData_sub, 10)
##     Marital Status Loan Amount Loan Price Liquid Networth  Loan PRC
## 19         Married         349        387   57.0   598.00 0.9018088
## 45         Married         315        355   42.9    66.70 0.8873239
## 47         Married         632        790  183.0  1427.00 0.8000000
## 61         Married         310        457  154.0   193.00 0.6783370
## 89         Married         732        975 1213.0  1550.00 0.7507693
## 99          Single         320        630  421.2  1051.20 0.5079365
## 117        Married         450        650  365.0   873.10 0.6923077
## 203        Married         450        620   60.0   641.00 0.7258065
## 217        Married         304        380  632.6   852.31 0.8000000
## 232        Married         312        390  120.0   289.00 0.8000000
# Question 7. BONUS – place the original .csv in a github file and have R read from the link. This will be a very
#useful skill as you progress in your data science education and career.
LoanDatGit <- "https://raw.githubusercontent.com/BeshkiaKvarnstrom/MSDS-Repos/main/loanapp.csv"
ReadGithub <- read.csv(LoanDatGit, header=TRUE)
head(ReadGithub)
##   X occ loanamt action  msa suffolk appinc typur unit married dep emp yjob self
## 1 1   1      89      1 1120       0     72     0    1       0   0   0    0    0
## 2 2   1     128      3 1120       0     74     0    1       1   1   0    0    0
## 3 3   1     128      1 1120       0     84     3    1       0   0   1    1    0
## 4 4   1      66      1 1120       0     36     0    1       1   0   0    0    1
## 5 5   1     120      1 1120       0     59     8    1       1   0   0    0    0
## 6 6   1     111      1 1120       0     63     9    1       0   0   0    0    0
##   atotinc cototinc hexp price other   liq rep gdlin lines mortg cons pubrec
## 1    5849        0 1031   118     0  34.5   1     1    15     2    1      0
## 2    4583     1508 1391   160     0  52.0   3     1    19     2    2      0
## 3    2666     4416 1371   143     0  37.0   6     1    18     2    2      0
## 4    3000        0  839   110     0  19.0   1     1    25     2    6      1
## 5    2583     2358 1341   134     0  31.0   1     1    15     2    1      0
## 6    2208     2959 1122   138     0 169.0   2     1    10     2    6      0
##    hrat obrat fixadj term apr prop inss inson gift cosign unver review  netw
## 1 17.63  34.5      0  360 118    1    0     0    0      0     0      1  99.6
## 2 22.54  34.1      1  360 175    2    0     0    0      0     0    999 847.0
## 3 19.00  26.0      0  180 145    2    0     0    0      0     0      3  40.0
## 4 24.00  37.0      0  360 110    2    0     0    1      0     0      2 158.0
## 5 25.10  32.1      0  360 135    1    1     0    0      0     0      2  69.0
## 6 21.00  33.0      0  360 144    2    0     0    0      0     0      1 262.0
##   unem min30 bd mi old vr sch black hispan male reject approve mortno mortperf
## 1  3.2     0  0  1   0  1   1     0      0   NA      0       1      0        1
## 2  3.2     0  0  1   0  1   1     0      0    1      1       0      0        1
## 3  3.9     0  1  1   0  0   1     0      0    1      0       1      0        1
## 4  3.1     0  0  1   1  1   1     0      0    1      0       1      0        1
## 5  4.3     0  1  1   0  0   0     0      0    1      0       1      0        1
## 6  3.2     0  1  1   0  0   0     0      0    1      0       1      0        1
##   mortlat1 mortlat2 chist multi   loanprc thick white
## 1        0        0     1     0 0.7542373     0     1
## 2        0        0     1     0 0.8000000     1     1
## 3        0        0     1     0 0.8951049     1     1
## 4        0        0     0     0 0.6000000     0     1
## 5        0        0     1     0 0.8955224     0     1
## 6        0        0     0     0 0.8043478     0     1