library (car)
##
## Attaching package: 'car'
##
## The following object is masked _by_ '.GlobalEnv':
##
## Bfox
data(Bfox)
year=1946:1975
Bfox=data.frame(Bfox,year)
head(Bfox)
## partic tfr menwage womwage debt parttime year
## 1946 25.3 3748 25.35 14.05 18.18 10.28 1946
## 1947 24.4 3996 26.14 14.61 28.33 9.28 1947
## 1948 24.2 3725 25.11 14.23 30.55 9.51 1948
## 1949 24.2 3750 25.45 14.61 35.81 8.87 1949
## 1950 23.7 3669 26.79 15.26 38.39 8.54 1950
## 1951 24.2 3682 26.33 14.58 26.52 8.84 1951
Bfox["1973","tfr"]=1931
spm(Bfox)
Question 1
That means that we are looking at how the amount of babies being born each year, out of a total of 1000 women at similar age and fertility, changes.
Part a)
cor(Bfox)
## partic tfr menwage womwage debt parttime
## partic 1.0000000 -0.9011244 0.9595042 0.9673851 0.9818953 0.9504080
## tfr -0.9011244 1.0000000 -0.8117594 -0.8721014 -0.8695730 -0.8960583
## menwage 0.9595042 -0.8117594 1.0000000 0.9829573 0.9861088 0.8532775
## womwage 0.9673851 -0.8721014 0.9829573 1.0000000 0.9867513 0.8715406
## debt 0.9818953 -0.8695730 0.9861088 0.9867513 1.0000000 0.8874899
## parttime 0.9504080 -0.8960583 0.8532775 0.8715406 0.8874899 1.0000000
## year 0.9531445 -0.7786034 0.9890913 0.9637248 0.9804528 0.8458774
## year
## partic 0.9531445
## tfr -0.7786034
## menwage 0.9890913
## womwage 0.9637248
## debt 0.9804528
## parttime 0.8458774
## year 1.0000000
Part b)
tfr.sum=summary(Bfox$tfr)
str(tfr.sum)
## Classes 'summaryDefault', 'table' Named num [1:6] 1866 2730 3749 3432 4046 ...
## ..- attr(*, "names")= chr [1:6] "Min." "1st Qu." "Median" "Mean" ...
tfrsum=unlist(tfr.sum)
str(tfrsum)
## Classes 'summaryDefault', 'table' Named num [1:6] 1866 2730 3749 3432 4046 ...
## ..- attr(*, "names")= chr [1:6] "Min." "1st Qu." "Median" "Mean" ...
tfrmin=tfrsum[1]
tfr1Q=tfrsum[2]
tfrMed=tfrsum[3]
tfrMean=tfrsum[4]
tfr3Q=tfrsum[5]
tfrMax=tfrsum[6]
The range of the data is 1866 to 4168. 25% of my vaues are below 2730 and 75% of my data is below 4046. The Median of my data is 3749 and the mean is 3432.
Part c)
plot(density(Bfox$tfr))
This graph shows a higher density of briths at around 4000.
Part d)
Bfox.d=(Bfox$tfr<3000)
str(Bfox.d)
## logi [1:30] FALSE FALSE FALSE FALSE FALSE FALSE ...
tfryears=Bfox[Bfox.d,2]
str(tfryears)
## num [1:9] 2879 2681 2563 2571 2503 ...
yearsd=Bfox[Bfox.d,7]
str(yearsd)
## int [1:9] 1967 1968 1969 1970 1971 1972 1973 1974 1975
The years that have the tfr values that are less than 3000 are 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975.
Part e)
Bfox.e=(Bfox$tfr<2000)
tfryears.e=Bfox[Bfox.e,2]
str(tfryears.e)
## num [1:3] 1931 1875 1866
years.e=Bfox[Bfox.e,7]
str(years.e)
## int [1:3] 1973 1974 1975
The years that show a tfr value that are less than 2000 are 1973, 1974, 1975
Part f)
Bfox.f=(Bfox$tfr>3432)
tfryears.f=Bfox[Bfox.f,2]
years.f=Bfox[Bfox.f,7]
The years that have a tfr that is less than the tfr mean 3432 are 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965.
Question 2
Part a)
L3Q2lm = lm(tfr ~ partic, data=Bfox)
Part b)
plot(tfr ~ partic, data=Bfox)
abline(L3Q2lm)
Part c)
L3Q2c.sum=summary(L3Q2lm)$r.squared
L3Q2r=L3Q2c.sum[1]
The r value is 0.8120253. I think this is fairly significant.
–
Part d)
prediction.2.d=predict(L3Q2lm, data.frame(partic=c(30,50)))
str(prediction.2.d)
## Named num [1:2] 3431 1081
## - attr(*, "names")= chr [1:2] "1" "2"
pred30=prediction.2.d[1]
pred50=prediction.2.d[2]
That redicts that out of 1000 women there would be 3431.1416218 babies when 30% of the women are in the workforce and 1080.8726171 when 50% of women are in the workplace. I do think this is fairly reasonable for the 30% value but fairly low when you look at the 50% value with every other woman only having 1 child.
Part e)
coef(L3Q2lm)
## (Intercept) partic
## 6956.5451 -117.5135
Question 3
Part a)
L3Q3lm=lm(tfr ~ year, data=Bfox)
plot(tfr ~ year, data=Bfox)
abline(L3Q3lm)
summary(L3Q3lm)$r.squared
## [1] 0.6062232
summary(L3Q3lm)$fstatistic
## value numdf dendf
## 43.10627 1.00000 28.00000
Part b)
predtfr3=predict(L3Q3lm, data.frame(year=c(2014)))
predtfr=predtfr3[1]
This would predict that in 2014 the average number of babies per woman would be -240.173452. This is obviously a very unreasonable number.
Part c)
L3Q3clm=lm(year ~ tfr, data=Bfox)
plot(year ~ tfr, data=Bfox)
abline(L3Q3clm)
summary(L3Q3clm)$r.squared
## [1] 0.6062232
summary(L3Q3clm)$fstatistic
## value numdf dendf
## 43.10627 1.00000 28.00000
predyear3=predict(L3Q3clm, data.frame(tfr=c(2)))
year3=predyear3[1]
This would predict that in year 1990.7937737 the tfr values would drop below 2.
Part d)
coef(L3Q3lm)
## (Intercept) year
## 137980.71376 -68.63003
Question 4
L3Q4lm=lm(tfr ~ partic+menwage+womwage+debt+parttime+year, data=Bfox)
Part a)
L3Q4c=coef(L3Q4lm)
print(L3Q4c)
## (Intercept) partic menwage womwage debt
## -2.223686e+05 -3.375223e-01 5.324116e+01 -9.771878e+01 -2.144573e+01
## parttime year
## -9.146597e+01 1.168562e+02
particc=L3Q4c[2]
menwagec=L3Q4c[3]
womwagec=L3Q4c[4]
debtc=L3Q4c[5]
parttimec=L3Q4c[6]
yearc=L3Q4c[7]
The corellation coeffients for partic is -0.3375223, menwage 53.2411631, womwage -97.7187849, debt -21.445725, parttime -91.4659668, and year 116.8561914.
Part b)
Question 5
Part a)
L3Q5lm=lm(tfr ~ partic-year, data=Bfox)
L3Q5an=anova(L3Q5lm)
print(L3Q5an)
## Analysis of Variance Table
##
## Response: tfr
## Df Sum Sq Mean Sq F value Pr(>F)
## partic 1 14179637 14179637 120.96 1.132e-11 ***
## Residuals 28 3282427 117230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(L3Q5lm)
## (Intercept) partic
## 6956.5451 -117.5135