##RBridge Week 2 Assignment
Importing .CSV file for data manipulation
Option 1. Pops open a screen and you can choose the file.
Credit_card_Alt_1 <- read.csv(file.choose())
Option 2. If your working drive is set to where the file is located Option 2.5 There is another option to import however that requires knowledge of computer paths of the file and due to privacy I prefer to not show that.
Credit_card_Alt_2 <- read.csv("CreditCard.csv")
BONUS – place the original .csv in a github file and have R read from the link.
Credit_card = read.table(file="https://raw.githubusercontent.com/Jlok17/Data-Science-Projects/main/CreditCard.csv", header=TRUE,sep=",")
summary(Credit_card)
## X card reports age
## Min. : 1.0 Length:1319 Min. : 0.0000 Min. : 0.1667
## 1st Qu.: 330.5 Class :character 1st Qu.: 0.0000 1st Qu.:25.4167
## Median : 660.0 Mode :character Median : 0.0000 Median :31.2500
## Mean : 660.0 Mean : 0.4564 Mean :33.2131
## 3rd Qu.: 989.5 3rd Qu.: 0.0000 3rd Qu.:39.4167
## Max. :1319.0 Max. :14.0000 Max. :83.5000
## income share expenditure owner
## Min. : 0.210 Min. :0.0001091 Min. : 0.000 Length:1319
## 1st Qu.: 2.244 1st Qu.:0.0023159 1st Qu.: 4.583 Class :character
## Median : 2.900 Median :0.0388272 Median : 101.298 Mode :character
## Mean : 3.365 Mean :0.0687322 Mean : 185.057
## 3rd Qu.: 4.000 3rd Qu.:0.0936168 3rd Qu.: 249.036
## Max. :13.500 Max. :0.9063205 Max. :3099.505
## selfemp dependents months majorcards
## Length:1319 Min. :0.0000 Min. : 0.00 Min. :0.0000
## Class :character 1st Qu.:0.0000 1st Qu.: 12.00 1st Qu.:1.0000
## Mode :character Median :1.0000 Median : 30.00 Median :1.0000
## Mean :0.9939 Mean : 55.27 Mean :0.8173
## 3rd Qu.:2.0000 3rd Qu.: 72.00 3rd Qu.:1.0000
## Max. :6.0000 Max. :540.00 Max. :1.0000
## active
## Min. : 0.000
## 1st Qu.: 2.000
## Median : 6.000
## Mean : 6.997
## 3rd Qu.:11.000
## Max. :46.000
print(paste("Mean for Dependents is",mean(Credit_card$dependents)
))
## [1] "Mean for Dependents is 0.99393479909022"
print(paste("Mean for Age is",mean(Credit_card$age)
))
## [1] "Mean for Age is 33.2131032549659"
print(paste("Mean for Income is",mean(Credit_card$income)
))
## [1] "Mean for Income is 3.36537604245641"
print(paste("Median for Dependents is",median(Credit_card$dependents)
))
## [1] "Median for Dependents is 1"
print(paste("Median for Age is",median(Credit_card$age)
))
## [1] "Median for Age is 31.25"
print(paste("Median for Income is",median(Credit_card$income)
))
## [1] "Median for Income is 2.9"
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Credit_card %>% filter(expenditure > 10, months > 30 ) -> Credit_Card10
head(Credit_Card10)
## X card reports age income share expenditure owner selfemp
## 1 1 yes 0 37.66667 4.5200 0.033269910 124.98330 yes no
## 2 3 yes 0 33.66667 4.5000 0.004155556 15.00000 yes no
## 3 5 yes 0 32.16667 9.7867 0.067050590 546.50330 yes no
## 4 6 yes 0 23.25000 2.5000 0.044438400 91.99667 no no
## 5 8 yes 0 29.16667 2.3700 0.076433760 150.79000 yes no
## 6 9 yes 0 37.00000 3.8000 0.245627900 777.82170 yes no
## dependents months majorcards active
## 1 3 54 1 12
## 2 4 58 1 5
## 3 2 64 1 5
## 4 0 54 1 1
## 5 0 77 1 3
## 6 0 97 1 6
View(Credit_Card10)
Credit_Card10 <- Credit_Card10 %>% rename(Wage = income, Self_Employment = selfemp, Case_Num = X)
View(Credit_Card10)
summary(Credit_Card10)
## Case_Num card reports age
## Min. : 1 Length:449 Min. :0.0000 Min. : 0.50
## 1st Qu.: 374 Class :character 1st Qu.:0.0000 1st Qu.:29.08
## Median : 734 Mode :character Median :0.0000 Median :36.92
## Mean : 683 Mean :0.1737 Mean :36.85
## 3rd Qu.:1000 3rd Qu.:0.0000 3rd Qu.:43.58
## Max. :1318 Max. :4.0000 Max. :83.50
## Wage share expenditure owner
## Min. : 0.210 Min. :0.002706 Min. : 10.29 Length:449
## 1st Qu.: 2.458 1st Qu.:0.031019 1st Qu.: 80.45 Class :character
## Median : 3.200 Median :0.057511 Median : 159.73 Mode :character
## Mean : 3.724 Mean :0.086918 Mean : 248.92
## 3rd Qu.: 4.500 3rd Qu.:0.108520 3rd Qu.: 323.95
## Max. :13.500 Max. :0.541860 Max. :3099.51
## Self_Employment dependents months majorcards
## Length:449 Min. :0.000 Min. : 31.0 Min. :0.0000
## Class :character 1st Qu.:0.000 1st Qu.: 48.0 1st Qu.:1.0000
## Mode :character Median :1.000 Median : 77.0 Median :1.0000
## Mean :1.163 Mean :101.4 Mean :0.8151
## 3rd Qu.:2.000 3rd Qu.:132.0 3rd Qu.:1.0000
## Max. :6.000 Max. :540.0 Max. :1.0000
## active
## Min. : 0.00
## 1st Qu.: 3.00
## Median : 7.00
## Mean : 8.08
## 3rd Qu.:12.00
## Max. :29.00
print(paste("Mean for Dependents is",mean(Credit_Card10$dependents)
))
## [1] "Mean for Dependents is 1.16258351893096"
print(paste("Mean for Age is",mean(Credit_Card10$age)
))
## [1] "Mean for Age is 36.8544913659243"
print(paste("Mean for Wage is",mean(Credit_Card10$Wage)
))
## [1] "Mean for Wage is 3.72362138084633"
print(paste("Median for Dependents is",median(Credit_Card10$dependents)
))
## [1] "Median for Dependents is 1"
print(paste("Median for Age is",median(Credit_Card10$age)
))
## [1] "Median for Age is 36.91667"
print(paste("Median for Wage is",median(Credit_Card10$Wage)
))
## [1] "Median for Wage is 3.2"
4 Explanation: The Means for Age, Dependents, and Income from the original data set was 33.21, 0.99, 3.36 respectively. Comparatively, in the subset the means were 36.85, 1.16, 3.72. The Medians for Age, Dependents, and Income from the original data set was, 31.25, 1, 2.9, as the subset was 36.92, 1, 3.2.
library(stringr)
Credit_Card10$owner <- str_replace(string=Credit_Card10$owner, pattern="yes", replacement="Y")
Credit_Card10$owner <- str_replace(string=Credit_Card10$owner, pattern="no", replacement="N")
Credit_Card10$card <- str_replace(string=Credit_Card10$card, pattern="yes", replacement="Y")
Credit_Card10$majorcards <- str_replace(string=Credit_Card10$majorcards, pattern="1", replacement="Yes")
Credit_Card10$majorcards <- str_replace(string=Credit_Card10$majorcards, pattern="0", replacement="No")
head(Credit_Card10)
## Case_Num card reports age Wage share expenditure owner
## 1 1 Y 0 37.66667 4.5200 0.033269910 124.98330 Y
## 2 3 Y 0 33.66667 4.5000 0.004155556 15.00000 Y
## 3 5 Y 0 32.16667 9.7867 0.067050590 546.50330 Y
## 4 6 Y 0 23.25000 2.5000 0.044438400 91.99667 N
## 5 8 Y 0 29.16667 2.3700 0.076433760 150.79000 Y
## 6 9 Y 0 37.00000 3.8000 0.245627900 777.82170 Y
## Self_Employment dependents months majorcards active
## 1 no 3 54 Yes 12
## 2 no 4 58 Yes 5
## 3 no 2 64 Yes 5
## 4 no 0 54 Yes 1
## 5 no 0 77 Yes 3
## 6 no 0 97 Yes 6