source("http://www.openintro.org/stat/data/arbuthnot.R")
head(arbuthnot,10)
## year boys girls
## 1 1629 5218 4683
## 2 1630 4858 4457
## 3 1631 4422 4102
## 4 1632 4994 4590
## 5 1633 5158 4839
## 6 1634 5035 4820
## 7 1635 5106 4928
## 8 1636 4917 4605
## 9 1637 4703 4457
## 10 1638 5359 4952
print(paste(paste("arbuthnot dataset has: rows = ",dim(arbuthnot)[1]),paste("and columns = ",dim(arbuthnot)[2])) )
## [1] "arbuthnot dataset has: rows = 82 and columns = 3"
names(arbuthnot)
## [1] "year" "boys" "girls"
arbuthnot$boys
## [1] 5218 4858 4422 4994 5158 5035 5106 4917 4703 5359 5366 5518 5470 5460
## [15] 4793 4107 4047 3768 3796 3363 3079 2890 3231 3220 3196 3441 3655 3668
## [29] 3396 3157 3209 3724 4748 5216 5411 6041 5114 4678 5616 6073 6506 6278
## [43] 6449 6443 6073 6113 6058 6552 6423 6568 6247 6548 6822 6909 7577 7575
## [57] 7484 7575 7737 7487 7604 7909 7662 7602 7676 6985 7263 7632 8062 8426
## [71] 7911 7578 8102 8031 7765 6113 8366 7952 8379 8239 7840 7640
arbuthnot$girls
## [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910
## [15] 4617 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382
## [29] 3289 3013 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719
## [43] 6061 6120 5822 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127
## [57] 7246 7119 7214 7101 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626
## [71] 7452 7061 7514 7656 7683 5738 7779 7417 7687 7623 7380 7288
length(arbuthnot$girls)
## [1] 82
plot(x = arbuthnot$year, y = arbuthnot$girls, type = "l")
The plot shows a drop in the numbers of baptised girls between the years 1640 and 1660, then it picks up incrementally after year 1660.
plot(x = arbuthnot$year, y = arbuthnot$boys / (arbuthnot$boys + arbuthnot$girls), type = "l")
The plot shows lots peaks of continous increase and decrease in a steady form until almost year 1680, and from then on, there are shy peaks indicating a decrease as a whole and which it looks lower than the part that is <1680.
source("http://www.openintro.org/stat/data/present.R")
head(present,10)
## year boys girls
## 1 1940 1211684 1148715
## 2 1941 1289734 1223693
## 3 1942 1444365 1364631
## 4 1943 1508959 1427901
## 5 1944 1435301 1359499
## 6 1945 1404587 1330869
## 7 1946 1691220 1597452
## 8 1947 1899876 1800064
## 9 1948 1813852 1721216
## 10 1949 1826352 1733177
present$year
## [1] 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953
## [15] 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967
## [29] 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981
## [43] 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995
## [57] 1996 1997 1998 1999 2000 2001 2002
print(paste(paste("present dataset has: rows = ",dim(present)[1]),paste("and columns = ",dim(present)[2])) )
## [1] "present dataset has: rows = 63 and columns = 3"
names(present)
## [1] "year" "boys" "girls"
library(compare)
##
## Attaching package: 'compare'
## The following object is masked from 'package:base':
##
## isTRUE
data.frame(present=c(rows=dim(present)[1],cols=dim(present)[2]),arbuthnot=c(rows=dim(arbuthnot)[1],cols=dim(arbuthnot)[2]))
## present arbuthnot
## rows 63 82
## cols 3 3
dfa<-data.frame(arbuthnot)
dfp<-data.frame(present)
compare(dfa,dfp,allowAll=TRUE)
## FALSE [FALSE, FALSE, FALSE]
## [year] coerced from <numeric> to <integer>
## [boys] coerced from <numeric> to <integer>
## [girls] coerced from <numeric> to <integer>
## shortened model rows
nrow(dfa) > nrow(dfp)
## [1] TRUE
compare(scale(dfp),scale(dfa),allowAll=TRUE)
## FALSE
## shortened comparison
## sorted
identical(dfa,dfp)
## [1] FALSE
ab<-arbuthnot$boys/(arbuthnot$boys+arbuthnot$girls)
mab<-mean(mean(ab,na.rm=TRUE),na.rm=TRUE)
ag<-arbuthnot$girls/(arbuthnot$boys+arbuthnot$girls)
mag<-mean(mean(ag,na.rm=TRUE),na.rm=TRUE)
pb<-present$boys/(present$boys+present$girls)
mpb<-mean(mean(pb,na.rm=TRUE),na.rm=TRUE)
pg<-present$girls/(present$boys+present$girls)
mpg<-mean(mean(pg,na.rm=TRUE),na.rm=TRUE)
data.frame(present=c(median_mean_boys=(median(mab)),median_mean_girls=(median(mag))),arbuthnot=c(median_mean_boys=(median(mpb)),median_mean_girls=(median(mpg))))
## present arbuthnot
## median_mean_boys 0.5169751 0.512516
## median_mean_girls 0.4830249 0.487484
Values are different, rows are not equal but the scale is similar.
par(mfrow=c(1,2))
plot(x=arbuthnot$year, y=arbuthnot$boys/arbuthnot$girls, type="l", col="blue")
plot(x=present$year, y=present$boys/present$girls, type="l", col="red")
(sum(pb)/length(pb)) > (sum(pg)/length(pg))
## [1] TRUE
Yes, Arbuthnot’s observation about boys being born in greater proportion than girls hold up in the U.S. too.
v<-c()
for(i in 1:length(present$year)) {
v[i]<-sum(present$boys[i]+present$girls[i])
}
yr<-c()
for(j in 1:length(present$year)){
if(v[j]==max(v)){
yr<-present$year[j]
}
}
print(paste("the year that has the max # of births in US: ",yr))
## [1] "the year that has the max # of births in US: 1961"