library(plyr)
library(RCurl)
#  1 read in data & print summary
x <- getURL("https://raw.githubusercontent.com/KevinJpotter/edx_capstone/master/Cowles.csv")
df <- read.csv(text = x, header = TRUE, row.names = 'X')
summary(df[,1:2])
##   neuroticism     extraversion  
##  Min.   : 0.00   Min.   : 2.00  
##  1st Qu.: 8.00   1st Qu.:10.00  
##  Median :11.00   Median :13.00  
##  Mean   :11.47   Mean   :12.37  
##  3rd Qu.:15.00   3rd Qu.:15.00  
##  Max.   :24.00   Max.   :23.00
# 2 create new df with less rows & cols
new_df <- subset(df, volunteer == 'no', select = c(neuroticism, extraversion, sex))

cat('The old df size is', dim(df)[1], 'by', dim(df)[2],'   The new_df size is', dim(new_df)[1], 'by', dim(new_df)[2])
## The old df size is 1421 by 4    The new_df size is 824 by 3
#  3 rename cols
new_df <- rename(new_df,  c('neuroticism' = 'neuro', 'extraversion' = 'extra', 'sex' = 'm/f'))
head(new_df)
##   neuro extra    m/f
## 1    16    13 female
## 2     8    14   male
## 3     5    16   male
## 4     8    20 female
## 5     9    19   male
## 6     6    15   male
# 4 summary of new df
summary(new_df[,1:2])
##      neuro           extra      
##  Min.   : 0.00   Min.   : 2.00  
##  1st Qu.: 8.00   1st Qu.: 9.00  
##  Median :11.00   Median :12.00  
##  Mean   :11.42   Mean   :11.96  
##  3rd Qu.:15.00   3rd Qu.:15.00  
##  Max.   :23.00   Max.   :23.00
# 5 & 6 rename values and show results
head(new_df)
##   neuro extra    m/f
## 1    16    13 female
## 2     8    14   male
## 3     5    16   male
## 4     8    20 female
## 5     9    19   male
## 6     6    15   male
new_df$extra <- mapvalues(new_df$extra, from = c(13, 14, 16), to = c(1300, 1400, 1600))
head(new_df , n =10)
##    neuro extra    m/f
## 1     16  1300 female
## 2      8  1400   male
## 3      5  1600   male
## 4      8    20 female
## 5      9    19   male
## 6      6    15   male
## 7      8    10 female
## 8     12    11   male
## 9     15  1600   male
## 10    18     7   male
# 7 see step 1