#read in data
library(haven)
library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(ggplot2)
library(pander)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.1.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
uganda16 <- read_dta("C:/Users/rlutt/Downloads/UGIR7BFL.DTA")
uganda16<-zap_labels(uganda16)
#recodes
#contraception
uganda16$contraception<-as.factor(uganda16$v313)
uganda16$contraception<-car::Recode(uganda16$v313, recodes= "0='none'; 1='folkloric method'; 2='traditional method' ;3= 'modern method'")
uganda16$intention<-as.factor(uganda16$v364)
#usingmoderncontraception
uganda16$moderncon<-car::Recode(uganda16$v313, recodes= "0='1'; 1='2'; 2='2'; 3='2'")
uganda16$wantanotherchild<-ifelse(uganda16$v602!=9&uganda16$v602==1,1,0)
#decides on use
uganda16$decidecon<-as.factor(uganda16$v632)
#type of method
uganda16$type<-as.factor(uganda16$v304a_01)
#has another kid
uganda16$currentchildren<-uganda16$v202+uganda16$v203
# survey design variables
uganda16$psu <- uganda16$v021
uganda16$strata <- uganda16$v022
uganda16$pwt <- uganda16$v005/1000000
desi<-svydesign(ids = ~ psu, strata = ~ strata, weights =~ pwt, data=uganda16)
#filter
ug2016<-uganda16%>%
filter(complete.cases(v632,wantanotherchild, v313,currentchildren,v106 ))%>%
dplyr::select(v632,wantanotherchild,v313,currentchildren,v106 )%>%
mutate_at(vars(v632,wantanotherchild,v313,currentchildren, v106 ), scale)
#The variables I chose are not recoded bc I needed to keep them as numeric to run the analysis. I’ve encountered a lot of issues with doing an analysis on factor variables and keep running into the issue of the as.numeric function turning my factor variables into NAs rather than numbers.
#The labels for my variables of choice are as follows: v632- who decides on contraceptive use, wantanotherchild- whether they want antoher child or not, v313-if they use contraception or not, currentchildren-how many children they have, v106- education level. I chose these variables because they are related becasue they deal with contraceptive use and preferences on having children.
#Principal Components #For this homework, you are to use the technique of Principal Components Analysis (PCA) to perform a variable reduction of at least 5 variables.
library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.1.3
g16.pc<-PCA(ug2016[, c(1:5)], scale.unit=T, graph=F)
summary.PCA(g16.pc)
##
## Call:
## PCA(X = ug2016[, c(1:5)], scale.unit = T, graph = F)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Variance 1.655 1.041 0.958 0.800 0.546
## % of var. 33.103 20.814 19.164 16.002 10.916
## Cumulative % of var. 33.103 53.918 73.081 89.084 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2 ctr cos2
## 1 | 1.805 | 0.136 0.000 0.006 | -1.224 0.034 0.460 |
## 2 | 3.278 | 2.506 0.089 0.584 | -1.494 0.050 0.208 |
## 3 | 1.927 | -0.692 0.007 0.129 | -1.264 0.036 0.430 |
## 4 | 5.835 | 1.565 0.035 0.072 | 4.397 0.435 0.568 |
## 5 | 2.267 | 0.930 0.012 0.168 | 0.062 0.000 0.001 |
## 6 | 2.095 | -0.017 0.000 0.000 | -1.290 0.037 0.379 |
## 7 | 1.927 | -0.692 0.007 0.129 | -1.264 0.036 0.430 |
## 8 | 2.288 | 0.323 0.001 0.020 | -1.363 0.042 0.355 |
## 9 | 1.871 | 1.758 0.044 0.884 | 0.102 0.000 0.003 |
## 10 | 1.712 | -0.430 0.003 0.063 | 0.351 0.003 0.042 |
## Dim.3 ctr cos2
## 1 -0.708 0.012 0.154 |
## 2 -1.033 0.026 0.099 |
## 3 -0.877 0.019 0.207 |
## 4 -3.476 0.295 0.355 |
## 5 0.447 0.005 0.039 |
## 6 -0.942 0.022 0.202 |
## 7 -0.877 0.019 0.207 |
## 8 -1.007 0.025 0.194 |
## 9 0.616 0.009 0.109 |
## 10 0.708 0.012 0.171 |
##
## Variables
## Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr
## v632 | 0.158 1.515 0.025 | 0.703 47.522 0.495 | 0.685 48.990
## wantanotherchild | 0.750 34.021 0.563 | -0.016 0.026 0.000 | 0.051 0.270
## v313 | -0.032 0.061 0.001 | -0.726 50.648 0.527 | 0.686 49.071
## currentchildren | -0.804 39.056 0.646 | 0.136 1.766 0.018 | 0.117 1.434
## v106 | 0.648 25.347 0.420 | -0.020 0.039 0.000 | -0.047 0.234
## cos2
## v632 0.469 |
## wantanotherchild 0.003 |
## v313 0.470 |
## currentchildren 0.014 |
## v106 0.002 |
#If you have an idea for latent construct, state what you believe this is.
#Empowerment proxy variable #the latent variable in this case is a combination of variables that affect contraceptive use- fertility preferences, contraceptive knowledge, and level of education (research has shown the education affects fertility preferences )
#The first two eignvalues are above 1, showing that these variables are related and the PCA is ‘somewhat’ useful in this situation. However, the variables themselves pull the outcome in different directions, so it is not really a good choice for the construction of a latent variable.
#ultimately, want another child and v106(edu level) both have a similarly sized positive effect on the outcome.
#The variance is relatively spread amongst the variables, but the first accounts for over 30% of the variation.
#Report the summary statistics and correlation matrix for your data
#Report the results of the PCA, being sure to include the eigenvalues and
#summary statistics
eigenvalues<-g16.pc$eig
head(eigenvalues[1:6])
## [1] 1.6551683 1.0407132 0.9581893 0.8001228 0.5458065 33.1033656
g16.pc$var
## $coord
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## v632 0.15833661 0.70325244 0.68514299 0.02342515 -0.10196010
## wantanotherchild 0.75040671 -0.01646499 0.05090487 -0.48747259 0.44316796
## v313 -0.03176579 -0.72601436 0.68570904 0.03952488 -0.01161782
## currentchildren -0.80401781 0.13555022 0.11721917 0.14043541 0.54928959
## v106 0.64771434 -0.02018295 -0.04732671 0.73529599 0.19276611
##
## $cor
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## v632 0.15833661 0.70325244 0.68514299 0.02342515 -0.10196010
## wantanotherchild 0.75040671 -0.01646499 0.05090487 -0.48747259 0.44316796
## v313 -0.03176579 -0.72601436 0.68570904 0.03952488 -0.01161782
## currentchildren -0.80401781 0.13555022 0.11721917 0.14043541 0.54928959
## v106 0.64771434 -0.02018295 -0.04732671 0.73529599 0.19276611
##
## $cos2
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## v632 0.025070481 0.4945639989 0.469420920 0.0005487377 0.0103958618
## wantanotherchild 0.563110229 0.0002710959 0.002591306 0.2376295296 0.1963978394
## v313 0.001009065 0.5270968534 0.470196891 0.0015622162 0.0001349738
## currentchildren 0.646444641 0.0183738633 0.013740334 0.0197221044 0.3017190573
## v106 0.419533864 0.0004073516 0.002239817 0.5406601935 0.0371587736
##
## $contrib
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## v632 1.5146787 47.52164347 48.9904172 0.06858168 1.90467898
## wantanotherchild 34.0213280 0.02604905 0.2704378 29.69913307 35.98305211
## v313 0.0609645 50.64765894 49.0714003 0.19524706 0.02472924
## currentchildren 39.0561279 1.76550696 1.4339896 2.46488475 55.27949081
## v106 25.3469009 0.03914159 0.2337552 67.57215344 6.80804886
#If deemed appropriate, conduct some testing of your index/components/latent variables.
#I am not going to run any tests since the variables do not all yield eigenvalues over 1, meaning that each variable is not necessarily a “component” for a latent variable.