HW Week 2
url <- 'https://raw.githubusercontent.com/jforster19/R_BRIDGE/main/CollegeDistance.csv'
link <- paste('CollegeDistance.csv',sep = '')
#'https://github.com/jforster19/R_BRIDGE/blob/f672d61bbab52ca600d3a00383c0a4176968e05f/BankWages.csv'
df <- read.table(file=url,header=TRUE,sep = ',')
head(df)
## X gender ethnicity score fcollege mcollege home urban unemp wage distance
## 1 1 male other 39.15 yes no yes yes 6.2 8.09 0.2
## 2 2 female other 48.87 no no yes yes 6.2 8.09 0.2
## 3 3 male other 48.74 no no yes yes 6.2 8.09 0.2
## 4 4 male afam 40.40 no no yes yes 6.2 8.09 0.2
## 5 5 female other 40.48 no no no yes 5.6 8.09 0.4
## 6 6 male other 54.71 no no yes yes 5.6 8.09 0.4
## tuition education income region
## 1 0.88915 12 high other
## 2 0.88915 12 low other
## 3 0.88915 12 low other
## 4 0.88915 12 low other
## 5 0.88915 13 low other
## 6 0.88915 12 low other
summary(df)
## X gender ethnicity score
## Min. : 1 Length:4739 Length:4739 Min. :28.95
## 1st Qu.: 1186 Class :character Class :character 1st Qu.:43.92
## Median : 2370 Mode :character Mode :character Median :51.19
## Mean : 3955 Mean :50.89
## 3rd Qu.: 3554 3rd Qu.:57.77
## Max. :37810 Max. :72.81
## fcollege mcollege home urban
## Length:4739 Length:4739 Length:4739 Length:4739
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## unemp wage distance tuition
## Min. : 1.400 Min. : 6.590 Min. : 0.000 Min. :0.2575
## 1st Qu.: 5.900 1st Qu.: 8.850 1st Qu.: 0.400 1st Qu.:0.4850
## Median : 7.100 Median : 9.680 Median : 1.000 Median :0.8245
## Mean : 7.597 Mean : 9.501 Mean : 1.803 Mean :0.8146
## 3rd Qu.: 8.900 3rd Qu.:10.150 3rd Qu.: 2.500 3rd Qu.:1.1270
## Max. :24.900 Max. :12.960 Max. :20.000 Max. :1.4042
## education income region
## Min. :12.00 Length:4739 Length:4739
## 1st Qu.:12.00 Class :character Class :character
## Median :13.00 Mode :character Mode :character
## Mean :13.81
## 3rd Qu.:16.00
## Max. :18.00
mean(df$distance)
## [1] 1.80287
mean(df$wage)
## [1] 9.500506
mean(df$score)
## [1] 50.88903
new_df = subset(df,tuition > 1.127,select=c('gender','score','distance','unemp','wage','tuition'))
colnames(new_df) <- paste(colnames(new_df),"mod",sep = '_')
head(new_df)
## gender_mod score_mod distance_mod unemp_mod wage_mod tuition_mod
## 88 female 51.32 1.2 5.9 7.09 1.38568
## 89 female 36.37 1.2 5.9 7.09 1.38568
## 90 female 43.80 1.2 5.9 7.09 1.38568
## 91 female 45.14 1.2 5.9 7.09 1.38568
## 92 female 63.26 1.2 5.9 7.09 1.38568
## 93 female 51.32 1.2 5.9 7.09 1.38568
summary(new_df)
## gender_mod score_mod distance_mod unemp_mod
## Length:1246 Min. :32.52 Min. : 0.000 Min. : 2.50
## Class :character 1st Qu.:46.41 1st Qu.: 0.300 1st Qu.: 6.60
## Mode :character Median :53.29 Median : 0.800 Median : 8.00
## Mean :52.61 Mean : 1.298 Mean : 8.55
## 3rd Qu.:58.99 3rd Qu.: 1.500 3rd Qu.:10.20
## Max. :69.90 Max. :11.000 Max. :17.70
## wage_mod tuition_mod
## Min. : 7.09 Min. :1.127
## 1st Qu.: 9.64 1st Qu.:1.152
## Median : 9.96 Median :1.166
## Mean :10.27 Mean :1.223
## 3rd Qu.:11.62 3rd Qu.:1.248
## Max. :12.15 Max. :1.404
mean(new_df$distance_mod)
## [1] 1.297592
mean(new_df$wage_mod)
## [1] 10.26884
mean(new_df$score_mod)
## [1] 52.60662
#for higher tuitions it appears that the average wage, and score went up while average distance from school went down
new_df['gender_mod'][new_df['gender_mod'] == 'male'] <-'men'
new_df['gender_mod'][new_df['gender_mod'] == 'female']<-'women'
head(new_df)
## gender_mod score_mod distance_mod unemp_mod wage_mod tuition_mod
## 88 women 51.32 1.2 5.9 7.09 1.38568
## 89 women 36.37 1.2 5.9 7.09 1.38568
## 90 women 43.80 1.2 5.9 7.09 1.38568
## 91 women 45.14 1.2 5.9 7.09 1.38568
## 92 women 63.26 1.2 5.9 7.09 1.38568
## 93 women 51.32 1.2 5.9 7.09 1.38568