reddit <- read.csv('reddit.csv')
str(reddit)
## 'data.frame': 32754 obs. of 14 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ gender : int 0 0 1 0 1 0 0 0 0 0 ...
## $ age.range : Factor w/ 7 levels "18-24","25-34",..: 2 2 1 2 2 2 2 1 3 2 ...
## $ marital.status : Factor w/ 6 levels "Engaged","Forever Alone",..: NA NA NA NA NA 4 3 4 4 3 ...
## $ employment.status: Factor w/ 6 levels "Employed full time",..: 1 1 2 2 1 1 1 4 1 2 ...
## $ military.service : Factor w/ 2 levels "No","Yes": NA NA NA NA NA 1 1 1 1 1 ...
## $ children : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ education : Factor w/ 7 levels "Associate degree",..: 2 2 5 2 2 2 5 2 2 5 ...
## $ country : Factor w/ 439 levels " Canada"," Canada eh",..: 394 394 394 394 394 394 125 394 394 125 ...
## $ state : Factor w/ 53 levels "","Alabama","Alaska",..: 33 33 48 33 6 33 1 6 33 1 ...
## $ income.range : Factor w/ 8 levels "$100,000 - $149,999",..: 2 2 8 2 7 2 NA 7 2 7 ...
## $ fav.reddit : Factor w/ 1834 levels "","___","-","?",..: 720 691 1511 1528 188 691 1318 571 1629 1 ...
## $ dog.cat : Factor w/ 3 levels "I like cats.",..: NA NA NA NA NA 2 2 2 1 1 ...
## $ cheese : Factor w/ 11 levels "American","Brie",..: NA NA NA NA NA 3 3 1 10 7 ...
qplot(data=reddit,x=age.range)
Now we order the factor age.range
levels(reddit$age.range)
## [1] "18-24" "25-34" "35-44" "45-54" "55-64"
## [6] "65 or Above" "Under 18"
reddit$age.range <- ordered(reddit$age.range,c("Under 18", "18-24", "25-34", "35-44", "45-54", "55-64", "65 or Above"))
levels(reddit$age.range)
## [1] "Under 18" "18-24" "25-34" "35-44" "45-54"
## [6] "55-64" "65 or Above"
qplot(data=reddit,x=age.range)