Reading Dataset
setwd("c:/Users/Melissa/OneDrive/Documents/CUNY/Bridge/R/Homework 2")
data <- read.csv('scorecard.csv')
View(data)
Cleaned data without NA values and Summary
without_na_earnings_med = subset(data, !is.na(earnings_med))
without_na_count_not_working = subset(without_na_earnings_med, !is.na(count_not_working))
without_na = subset(without_na_count_not_working, !is.na(count_working))
summary(without_na)
## X unitid inst_name state_abbr
## Min. : 1 Min. :100654 Length:31190 Length:31190
## 1st Qu.: 8995 1st Qu.:160612 Class :character Class :character
## Median :17919 Median :208406 Mode :character Mode :character
## Mean :18289 Mean :251936
## 3rd Qu.:27587 3rd Qu.:380359
## Max. :37500 Max. :485458
## pred_degree_awarded_ipeds year earnings_med count_not_working
## Min. :1.000 Min. :2007 Min. : 8400 Min. : 0.0
## 1st Qu.:1.000 1st Qu.:2009 1st Qu.: 24900 1st Qu.: 53.0
## Median :2.000 Median :2011 Median : 31600 Median : 124.0
## Mean :1.988 Mean :2011 Mean : 33223 Mean : 386.1
## 3rd Qu.:3.000 3rd Qu.:2013 3rd Qu.: 39500 3rd Qu.: 314.8
## Max. :3.000 Max. :2014 Max. :171900 Max. :15960.0
## count_working
## Min. : 50
## 1st Qu.: 275
## Median : 670
## Mean : 2233
## 3rd Qu.: 1613
## Max. :94724
Create a subset from original data frame
list_income = aggregate(without_na$earnings_med, list(without_na$pred_degree_awarded_ipeds), mean)
print(list_income)
## Group.1 x
## 1 1 25188.58
## 2 2 31403.78
## 3 3 42877.52
Create new column names for the new data frame
list_with_names = setNames(list_income,c("Degree_Award_Income","Averaged_Earned_Income"))
print(list_with_names)
## Degree_Award_Income Averaged_Earned_Income
## 1 1 25188.58
## 2 2 31403.78
## 3 3 42877.52
Summary of new data frame
summary(list_with_names)
## Degree_Award_Income Averaged_Earned_Income
## Min. :1.0 Min. :25189
## 1st Qu.:1.5 1st Qu.:28296
## Median :2.0 Median :31404
## Mean :2.0 Mean :33157
## 3rd Qu.:2.5 3rd Qu.:37141
## Max. :3.0 Max. :42878
Values in a column renamed
without_na$inst_name[without_na$inst_name=="Alabama A & M University"]<-"Vikas"
without_na$inst_name[without_na$inst_name=="University of Alabama at Birmingham"]<-"Mikas"
without_na$inst_name[without_na$inst_name=="University of Alabama in Huntsville"]<-"Bikas"
Displayed enough rows
View(without_na[1:5,])
print (without_na[1:5,])
## X unitid inst_name state_abbr pred_degree_awarded_ipeds year
## 1 1 100654 Vikas AL 3 2007
## 2 2 100663 Mikas AL 3 2007
## 4 4 100706 Bikas AL 3 2007
## 5 5 100724 Alabama State University AL 3 2007
## 6 6 100751 The University of Alabama AL 3 2007
## earnings_med count_not_working count_working
## 1 36600 116 1139
## 2 40800 366 2636
## 4 49300 122 975
## 5 30500 210 1577
## 6 46700 292 2754