State by State
numstates = ZipGeography%>%
group_by(CityName,State)%>%
summarise(states=n())%>%
arrange(desc(states))%>%
na.omit()
Babies and the Bible
BibleNames <- read.csv("http://tiny.cc/dcf/BibleNames.csv")
BibleCount <- merge(BibleNames, BabyNames) %>%
group_by(year, sex) %>%
summarise(total=n())
ggplot(data = BibleCount,
aes(x = sex,y = total, fill = sex)) +
geom_bar(stat='identity',position="stack") #position_stack(width=.9))
BibleGirls=merge(BibleNames,BabyNames)%>%
group_by(name, sex)%>%
summarise(total=n())%>%
spread(key=sex, value=total, fill=0)%>%
mutate(ratio=pmin(M/F))%>%
filter(ratio<1)
Gender-Neutral Names
BothSexes <-
BabyNames %>%
spread(key=sex,value=count) %>%
filter( F>1, M>1)
Balance = BothSexes%>%
mutate(balance=abs(log(F/M)))%>%
select(name, F, M, balance)%>%
group_by(name)%>%
summarise(totalF=sum(F), totalM=sum(M), totalBalance=mean(balance))%>%
arrange(totalBalance)
head(Balance,10)
## Source: local data frame [10 x 4]
##
## name totalF totalM totalBalance
## (fctr) (int) (int) (dbl)
## 1 Tiney 5 5 0
## 2 Lenzy 11 11 0
## 3 Caro 5 5 0
## 4 Acey 5 5 0
## 5 Gustie 5 5 0
## 6 Quinnie 5 5 0
## 7 Dela 5 5 0
## 8 Ebba 5 5 0
## 9 Emer 5 5 0
## 10 Hart 7 7 0
Balance1 = Balance%>%
filter(totalF>100)
head(Balance1,10)
## Source: local data frame [10 x 4]
##
## name totalF totalM totalBalance
## (fctr) (int) (int) (dbl)
## 1 Infant 3749 4036 0.1574909
## 2 Eldean 107 102 0.1874680
## 3 Notnamed 643 717 0.1885897
## 4 Sevyn 111 113 0.1936266
## 5 Paxtyn 125 111 0.1976434
## 6 Lakota 1207 1089 0.2070751
## 7 Tennessee 146 145 0.2128245
## 8 Velmer 169 154 0.2200691
## 9 Dail 125 134 0.2255523
## 10 Riyan 261 253 0.2288037