Reading Dataset

setwd("c:/Users/Melissa/OneDrive/Documents/CUNY/Bridge/R/Homework 2")

data <- read.csv('scorecard.csv')
View(data)

Cleaned data without NA values and Summary

without_na_earnings_med = subset(data, !is.na(earnings_med))
without_na_count_not_working = subset(without_na_earnings_med, !is.na(count_not_working))
without_na = subset(without_na_count_not_working, !is.na(count_working))

summary(without_na)
##        X             unitid        inst_name          state_abbr       
##  Min.   :    1   Min.   :100654   Length:31190       Length:31190      
##  1st Qu.: 8995   1st Qu.:160612   Class :character   Class :character  
##  Median :17919   Median :208406   Mode  :character   Mode  :character  
##  Mean   :18289   Mean   :251936                                        
##  3rd Qu.:27587   3rd Qu.:380359                                        
##  Max.   :37500   Max.   :485458                                        
##  pred_degree_awarded_ipeds      year       earnings_med    count_not_working
##  Min.   :1.000             Min.   :2007   Min.   :  8400   Min.   :    0.0  
##  1st Qu.:1.000             1st Qu.:2009   1st Qu.: 24900   1st Qu.:   53.0  
##  Median :2.000             Median :2011   Median : 31600   Median :  124.0  
##  Mean   :1.988             Mean   :2011   Mean   : 33223   Mean   :  386.1  
##  3rd Qu.:3.000             3rd Qu.:2013   3rd Qu.: 39500   3rd Qu.:  314.8  
##  Max.   :3.000             Max.   :2014   Max.   :171900   Max.   :15960.0  
##  count_working  
##  Min.   :   50  
##  1st Qu.:  275  
##  Median :  670  
##  Mean   : 2233  
##  3rd Qu.: 1613  
##  Max.   :94724

Mean and Median for at least two attributes

mean_pre_degree = mean(without_na$'pred_degree_awarded_ipeds')
print(mean_pre_degree)
## [1] 1.988169
median_pre_degree = median(without_na$'pred_degree_awarded_ipeds')
print(median_pre_degree)
## [1] 2
mean_earnings_med = mean(without_na$'earnings_med')
print(mean_earnings_med)
## [1] 33223.09
median_pre_degree = median(without_na$'earnings_med')
print(median_pre_degree)
## [1] 31600

Create a subset from original data frame

list_income = aggregate(without_na$earnings_med, list(without_na$pred_degree_awarded_ipeds), mean)
print(list_income)
##   Group.1        x
## 1       1 25188.58
## 2       2 31403.78
## 3       3 42877.52

Create new column names for the new data frame

list_with_names = setNames(list_income,c("Degree_Award_Income","Averaged_Earned_Income"))
print(list_with_names)
##   Degree_Award_Income Averaged_Earned_Income
## 1                   1               25188.58
## 2                   2               31403.78
## 3                   3               42877.52

Summary of new data frame

summary(list_with_names)
##  Degree_Award_Income Averaged_Earned_Income
##  Min.   :1.0         Min.   :25189         
##  1st Qu.:1.5         1st Qu.:28296         
##  Median :2.0         Median :31404         
##  Mean   :2.0         Mean   :33157         
##  3rd Qu.:2.5         3rd Qu.:37141         
##  Max.   :3.0         Max.   :42878

Mean and Median of new data frame

mean_Degree_Award_Income = mean(list_with_names$'Degree_Award_Income')
print(mean_Degree_Award_Income)
## [1] 2
median_Degree_Award_Income = median(list_with_names$'Degree_Award_Income')
print(median_Degree_Award_Income)
## [1] 2
mean_Averaged_Earned_Income = mean(list_with_names$'Averaged_Earned_Income')
print(mean_Averaged_Earned_Income)
## [1] 33156.63
median_Averaged_Earned_Income = median(list_with_names$'Averaged_Earned_Income')
print(median_Averaged_Earned_Income)
## [1] 31403.78
The median and mean from subset were different from the original median and mean due to sample size being smaller than original.

Values in a column renamed

without_na$inst_name[without_na$inst_name=="Alabama A & M University"]<-"Vikas"
without_na$inst_name[without_na$inst_name=="University of Alabama at Birmingham"]<-"Mikas"
without_na$inst_name[without_na$inst_name=="University of Alabama in Huntsville"]<-"Bikas"

Displayed enough rows

View(without_na[1:5,])

print (without_na[1:5,])
##   X unitid                 inst_name state_abbr pred_degree_awarded_ipeds year
## 1 1 100654                     Vikas         AL                         3 2007
## 2 2 100663                     Mikas         AL                         3 2007
## 4 4 100706                     Bikas         AL                         3 2007
## 5 5 100724  Alabama State University         AL                         3 2007
## 6 6 100751 The University of Alabama         AL                         3 2007
##   earnings_med count_not_working count_working
## 1        36600               116          1139
## 2        40800               366          2636
## 4        49300               122           975
## 5        30500               210          1577
## 6        46700               292          2754