HW Week 2

url <- 'https://raw.githubusercontent.com/jforster19/R_BRIDGE/main/CollegeDistance.csv'
link <- paste('CollegeDistance.csv',sep = '')
    
#'https://github.com/jforster19/R_BRIDGE/blob/f672d61bbab52ca600d3a00383c0a4176968e05f/BankWages.csv'

df <- read.table(file=url,header=TRUE,sep = ',')
head(df)
##   X gender ethnicity score fcollege mcollege home urban unemp wage distance
## 1 1   male     other 39.15      yes       no  yes   yes   6.2 8.09      0.2
## 2 2 female     other 48.87       no       no  yes   yes   6.2 8.09      0.2
## 3 3   male     other 48.74       no       no  yes   yes   6.2 8.09      0.2
## 4 4   male      afam 40.40       no       no  yes   yes   6.2 8.09      0.2
## 5 5 female     other 40.48       no       no   no   yes   5.6 8.09      0.4
## 6 6   male     other 54.71       no       no  yes   yes   5.6 8.09      0.4
##   tuition education income region
## 1 0.88915        12   high  other
## 2 0.88915        12    low  other
## 3 0.88915        12    low  other
## 4 0.88915        12    low  other
## 5 0.88915        13    low  other
## 6 0.88915        12    low  other
summary(df)
##        X            gender           ethnicity             score      
##  Min.   :    1   Length:4739        Length:4739        Min.   :28.95  
##  1st Qu.: 1186   Class :character   Class :character   1st Qu.:43.92  
##  Median : 2370   Mode  :character   Mode  :character   Median :51.19  
##  Mean   : 3955                                         Mean   :50.89  
##  3rd Qu.: 3554                                         3rd Qu.:57.77  
##  Max.   :37810                                         Max.   :72.81  
##    fcollege           mcollege             home              urban          
##  Length:4739        Length:4739        Length:4739        Length:4739       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      unemp             wage           distance         tuition      
##  Min.   : 1.400   Min.   : 6.590   Min.   : 0.000   Min.   :0.2575  
##  1st Qu.: 5.900   1st Qu.: 8.850   1st Qu.: 0.400   1st Qu.:0.4850  
##  Median : 7.100   Median : 9.680   Median : 1.000   Median :0.8245  
##  Mean   : 7.597   Mean   : 9.501   Mean   : 1.803   Mean   :0.8146  
##  3rd Qu.: 8.900   3rd Qu.:10.150   3rd Qu.: 2.500   3rd Qu.:1.1270  
##  Max.   :24.900   Max.   :12.960   Max.   :20.000   Max.   :1.4042  
##    education        income             region         
##  Min.   :12.00   Length:4739        Length:4739       
##  1st Qu.:12.00   Class :character   Class :character  
##  Median :13.00   Mode  :character   Mode  :character  
##  Mean   :13.81                                        
##  3rd Qu.:16.00                                        
##  Max.   :18.00
mean(df$distance)
## [1] 1.80287
mean(df$wage)
## [1] 9.500506
mean(df$score)
## [1] 50.88903
new_df = subset(df,tuition > 1.127,select=c('gender','score','distance','unemp','wage','tuition'))
colnames(new_df) <- paste(colnames(new_df),"mod",sep = '_')
head(new_df)
##    gender_mod score_mod distance_mod unemp_mod wage_mod tuition_mod
## 88     female     51.32          1.2       5.9     7.09     1.38568
## 89     female     36.37          1.2       5.9     7.09     1.38568
## 90     female     43.80          1.2       5.9     7.09     1.38568
## 91     female     45.14          1.2       5.9     7.09     1.38568
## 92     female     63.26          1.2       5.9     7.09     1.38568
## 93     female     51.32          1.2       5.9     7.09     1.38568
summary(new_df)
##   gender_mod          score_mod      distance_mod      unemp_mod    
##  Length:1246        Min.   :32.52   Min.   : 0.000   Min.   : 2.50  
##  Class :character   1st Qu.:46.41   1st Qu.: 0.300   1st Qu.: 6.60  
##  Mode  :character   Median :53.29   Median : 0.800   Median : 8.00  
##                     Mean   :52.61   Mean   : 1.298   Mean   : 8.55  
##                     3rd Qu.:58.99   3rd Qu.: 1.500   3rd Qu.:10.20  
##                     Max.   :69.90   Max.   :11.000   Max.   :17.70  
##     wage_mod      tuition_mod   
##  Min.   : 7.09   Min.   :1.127  
##  1st Qu.: 9.64   1st Qu.:1.152  
##  Median : 9.96   Median :1.166  
##  Mean   :10.27   Mean   :1.223  
##  3rd Qu.:11.62   3rd Qu.:1.248  
##  Max.   :12.15   Max.   :1.404
mean(new_df$distance_mod)
## [1] 1.297592
mean(new_df$wage_mod)
## [1] 10.26884
mean(new_df$score_mod)
## [1] 52.60662
#for higher tuitions it appears that the average wage, and score went up while average distance from school went down
new_df['gender_mod'][new_df['gender_mod'] == 'male'] <-'men'
new_df['gender_mod'][new_df['gender_mod'] == 'female']<-'women'
head(new_df)
##    gender_mod score_mod distance_mod unemp_mod wage_mod tuition_mod
## 88      women     51.32          1.2       5.9     7.09     1.38568
## 89      women     36.37          1.2       5.9     7.09     1.38568
## 90      women     43.80          1.2       5.9     7.09     1.38568
## 91      women     45.14          1.2       5.9     7.09     1.38568
## 92      women     63.26          1.2       5.9     7.09     1.38568
## 93      women     51.32          1.2       5.9     7.09     1.38568