Lesson 2

setwd("D:/R/Udacity/L2/EDA_Course_Materials/lesson2")


statesInfo <- read.csv('stateData.csv')

subset(statesInfo, state.region == 1)
##                X state.abb state.area state.region population income
## 7    Connecticut        CT       5009            1       3100   5348
## 19         Maine        ME      33215            1       1058   3694
## 21 Massachusetts        MA       8257            1       5814   4755
## 29 New Hampshire        NH       9304            1        812   4281
## 30    New Jersey        NJ       7836            1       7333   5237
## 32      New York        NY      49576            1      18076   4903
## 38  Pennsylvania        PA      45333            1      11860   4449
## 39  Rhode Island        RI       1214            1        931   4558
## 45       Vermont        VT       9609            1        472   3907
##    illiteracy life.exp murder highSchoolGrad frost  area
## 7         1.1    72.48    3.1           56.0   139  4862
## 19        0.7    70.39    2.7           54.7   161 30920
## 21        1.1    71.83    3.3           58.5   103  7826
## 29        0.7    71.23    3.3           57.6   174  9027
## 30        1.1    70.93    5.2           52.5   115  7521
## 32        1.4    70.55   10.9           52.7    82 47831
## 38        1.0    70.43    6.1           50.2   126 44966
## 39        1.3    71.90    2.4           46.4   127  1049
## 45        0.6    71.64    5.5           57.1   168  9267
stateSubsetBracket <- statesInfo[statesInfo$state.region == 1, ]
head(stateSubsetBracket)
##                X state.abb state.area state.region population income
## 7    Connecticut        CT       5009            1       3100   5348
## 19         Maine        ME      33215            1       1058   3694
## 21 Massachusetts        MA       8257            1       5814   4755
## 29 New Hampshire        NH       9304            1        812   4281
## 30    New Jersey        NJ       7836            1       7333   5237
## 32      New York        NY      49576            1      18076   4903
##    illiteracy life.exp murder highSchoolGrad frost  area
## 7         1.1    72.48    3.1           56.0   139  4862
## 19        0.7    70.39    2.7           54.7   161 30920
## 21        1.1    71.83    3.3           58.5   103  7826
## 29        0.7    71.23    3.3           57.6   174  9027
## 30        1.1    70.93    5.2           52.5   115  7521
## 32        1.4    70.55   10.9           52.7    82 47831
dim(stateSubsetBracket)
## [1]  9 12
reddit <- read.csv("reddit.csv")
table(reddit$employment.status)
## 
##                    Employed full time 
##                                 14814 
##                             Freelance 
##                                  1948 
## Not employed and not looking for work 
##                                   682 
##    Not employed, but looking for work 
##                                  2087 
##                               Retired 
##                                    85 
##                               Student 
##                                 12987
summary(reddit)
##        id            gender          age.range    
##  Min.   :    1   Min.   :0.0000   18-24   :15802  
##  1st Qu.: 8189   1st Qu.:0.0000   25-34   :11575  
##  Median :16380   Median :0.0000   Under 18: 2330  
##  Mean   :16379   Mean   :0.1885   35-44   : 2257  
##  3rd Qu.:24568   3rd Qu.:0.0000   45-54   :  502  
##  Max.   :32756   Max.   :1.0000   (Other) :  200  
##                  NA's   :201      NA's    :   88  
##                                   marital.status 
##  Engaged                                 : 1109  
##  Forever Alone                           : 5850  
##  In a relationship                       : 9828  
##  Married/civil union/domestic partnership: 5490  
##  Single                                  :10428  
##  Widowed                                 :   44  
##  NA's                                    :    5  
##                              employment.status military.service
##  Employed full time                   :14814   No  :30526      
##  Freelance                            : 1948   Yes : 2223      
##  Not employed and not looking for work:  682   NA's:    5      
##  Not employed, but looking for work   : 2087                   
##  Retired                              :   85                   
##  Student                              :12987                   
##  NA's                                 :  151                   
##  children                                  education    
##  No  :27488   Bachelor's degree                 :11046  
##  Yes : 5047   Some college                      : 9600  
##  NA's:  219   Graduate or professional degree   : 4722  
##               High school graduate or equivalent: 3272  
##               Some high school                  : 1924  
##               (Other)                           : 2046  
##               NA's                              :  144  
##            country             state                    income.range 
##  United States :20967             :11908   Under $20,000      :7892  
##  Canada        : 2888   California: 3401   $50,000 - $69,999  :4133  
##  United Kingdom: 1782   Texas     : 1541   $70,000 - $99,999  :4101  
##  Australia     : 1051   New York  : 1418   $100,000 - $149,999:3522  
##  Germany       :  407   Illinois  :  976   $20,000 - $29,999  :3206  
##  (Other)       : 5482   Washington:  910   (Other)            :8285  
##  NA's          :  177   (Other)   :12600   NA's               :1615  
##                fav.reddit               dog.cat            cheese    
##                     : 4335   I like cats.   :11156   Other    :6563  
##  askreddit          : 2123   I like dogs.   :17151   Cheddar  :6102  
##  fffffffuuuuuuuuuuuu: 1746   I like turtles.: 4442   Brie     :3742  
##  pics               : 1651   NA's           :    5   Provolone:3456  
##  trees              : 1311                           Swiss    :3214  
##  (Other)            :21562                           (Other)  :9672  
##  NA's               :   26                           NA's     :   5
levels(reddit$age.range)
## [1] "18-24"       "25-34"       "35-44"       "45-54"       "55-64"      
## [6] "65 or Above" "Under 18"
library(ggplot2)
qplot(data = reddit, x = age.range)

# Order the factor levels in the age.range variable in order to create
# a graph with a natural order. Look up the documentation for
# the factor function or read through the example in the Instructor Notes.

# Once you're ready, try to write the code to order the levels of
# the age.range variable.

# Be sure you modify the variable in the data frame. That is modify reddit$age.range.
# Don't create a new variable.

# The levels of age.range should take on these values...

#    "Under 18", "18-24", "25-34", "35-44", "45-54", "55-64", "65 or Above"

# This exercise is ungraded. You can check your own work by using the Test Run
# button. Your plot will appear there.
library(ggplot2)
reddit$age.range <- ordered(reddit$age.range, levels = c("Under 18", "18-24", "25-34", "35-44", "45-54", "55-64", "65, Above"))
qplot(data = reddit, x = age.range)