#read in data
library(haven)
library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(ggplot2)
library(pander)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.1.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
uganda16 <- read_dta("C:/Users/rlutt/Downloads/UGIR7BFL.DTA")
uganda16<-zap_labels(uganda16)
#recodes

#contraception
uganda16$contraception<-as.factor(uganda16$v313)
uganda16$contraception<-car::Recode(uganda16$v313, recodes= "0='none'; 1='folkloric method'; 2='traditional method' ;3= 'modern method'")

uganda16$intention<-as.factor(uganda16$v364)

#usingmoderncontraception
uganda16$moderncon<-car::Recode(uganda16$v313, recodes= "0='1'; 1='2'; 2='2'; 3='2'")

uganda16$wantanotherchild<-ifelse(uganda16$v602!=9&uganda16$v602==1,1,0)

#decides on use
uganda16$decidecon<-as.factor(uganda16$v632)


#type of method
uganda16$type<-as.factor(uganda16$v304a_01) 

#has another kid
uganda16$currentchildren<-uganda16$v202+uganda16$v203


# survey design variables
uganda16$psu <- uganda16$v021
uganda16$strata <- uganda16$v022
uganda16$pwt <- uganda16$v005/1000000
desi<-svydesign(ids = ~ psu, strata = ~ strata, weights =~ pwt, data=uganda16)
#filter
ug2016<-uganda16%>%
filter(complete.cases(v632,wantanotherchild, v313,currentchildren,v106 ))%>%
  dplyr::select(v632,wantanotherchild,v313,currentchildren,v106 )%>%
  mutate_at(vars(v632,wantanotherchild,v313,currentchildren, v106 ), scale)

#The variables I chose are not recoded bc I needed to keep them as numeric to run the analysis. I’ve encountered a lot of issues with doing an analysis on factor variables and keep running into the issue of the as.numeric function turning my factor variables into NAs rather than numbers.

#The labels for my variables of choice are as follows: v632- who decides on contraceptive use, wantanotherchild- whether they want antoher child or not, v313-if they use contraception or not, currentchildren-how many children they have, v106- education level. I chose these variables because they are related becasue they deal with contraceptive use and preferences on having children.

#Principal Components #For this homework, you are to use the technique of Principal Components Analysis (PCA) to perform a variable reduction of at least 5 variables.

library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.1.3
g16.pc<-PCA(ug2016[, c(1:5)], scale.unit=T, graph=F)


summary.PCA(g16.pc)
## 
## Call:
## PCA(X = ug2016[, c(1:5)], scale.unit = T, graph = F) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5
## Variance               1.655   1.041   0.958   0.800   0.546
## % of var.             33.103  20.814  19.164  16.002  10.916
## Cumulative % of var.  33.103  53.918  73.081  89.084 100.000
## 
## Individuals (the 10 first)
##                      Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## 1                |  1.805 |  0.136  0.000  0.006 | -1.224  0.034  0.460 |
## 2                |  3.278 |  2.506  0.089  0.584 | -1.494  0.050  0.208 |
## 3                |  1.927 | -0.692  0.007  0.129 | -1.264  0.036  0.430 |
## 4                |  5.835 |  1.565  0.035  0.072 |  4.397  0.435  0.568 |
## 5                |  2.267 |  0.930  0.012  0.168 |  0.062  0.000  0.001 |
## 6                |  2.095 | -0.017  0.000  0.000 | -1.290  0.037  0.379 |
## 7                |  1.927 | -0.692  0.007  0.129 | -1.264  0.036  0.430 |
## 8                |  2.288 |  0.323  0.001  0.020 | -1.363  0.042  0.355 |
## 9                |  1.871 |  1.758  0.044  0.884 |  0.102  0.000  0.003 |
## 10               |  1.712 | -0.430  0.003  0.063 |  0.351  0.003  0.042 |
##                   Dim.3    ctr   cos2  
## 1                -0.708  0.012  0.154 |
## 2                -1.033  0.026  0.099 |
## 3                -0.877  0.019  0.207 |
## 4                -3.476  0.295  0.355 |
## 5                 0.447  0.005  0.039 |
## 6                -0.942  0.022  0.202 |
## 7                -0.877  0.019  0.207 |
## 8                -1.007  0.025  0.194 |
## 9                 0.616  0.009  0.109 |
## 10                0.708  0.012  0.171 |
## 
## Variables
##                     Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr
## v632             |  0.158  1.515  0.025 |  0.703 47.522  0.495 |  0.685 48.990
## wantanotherchild |  0.750 34.021  0.563 | -0.016  0.026  0.000 |  0.051  0.270
## v313             | -0.032  0.061  0.001 | -0.726 50.648  0.527 |  0.686 49.071
## currentchildren  | -0.804 39.056  0.646 |  0.136  1.766  0.018 |  0.117  1.434
## v106             |  0.648 25.347  0.420 | -0.020  0.039  0.000 | -0.047  0.234
##                    cos2  
## v632              0.469 |
## wantanotherchild  0.003 |
## v313              0.470 |
## currentchildren   0.014 |
## v106              0.002 |

#If you have an idea for latent construct, state what you believe this is.

#Empowerment proxy variable #the latent variable in this case is a combination of variables that affect contraceptive use- fertility preferences, contraceptive knowledge, and level of education (research has shown the education affects fertility preferences )

#The first two eignvalues are above 1, showing that these variables are related and the PCA is ‘somewhat’ useful in this situation. However, the variables themselves pull the outcome in different directions, so it is not really a good choice for the construction of a latent variable.

#ultimately, want another child and v106(edu level) both have a similarly sized positive effect on the outcome.

#The variance is relatively spread amongst the variables, but the first accounts for over 30% of the variation.

#Report the summary statistics and correlation matrix for your data

#Report the results of the PCA, being sure to include the eigenvalues and

#summary statistics

eigenvalues<-g16.pc$eig
head(eigenvalues[1:6])
## [1]  1.6551683  1.0407132  0.9581893  0.8001228  0.5458065 33.1033656
g16.pc$var
## $coord
##                        Dim.1       Dim.2       Dim.3       Dim.4       Dim.5
## v632              0.15833661  0.70325244  0.68514299  0.02342515 -0.10196010
## wantanotherchild  0.75040671 -0.01646499  0.05090487 -0.48747259  0.44316796
## v313             -0.03176579 -0.72601436  0.68570904  0.03952488 -0.01161782
## currentchildren  -0.80401781  0.13555022  0.11721917  0.14043541  0.54928959
## v106              0.64771434 -0.02018295 -0.04732671  0.73529599  0.19276611
## 
## $cor
##                        Dim.1       Dim.2       Dim.3       Dim.4       Dim.5
## v632              0.15833661  0.70325244  0.68514299  0.02342515 -0.10196010
## wantanotherchild  0.75040671 -0.01646499  0.05090487 -0.48747259  0.44316796
## v313             -0.03176579 -0.72601436  0.68570904  0.03952488 -0.01161782
## currentchildren  -0.80401781  0.13555022  0.11721917  0.14043541  0.54928959
## v106              0.64771434 -0.02018295 -0.04732671  0.73529599  0.19276611
## 
## $cos2
##                        Dim.1        Dim.2       Dim.3        Dim.4        Dim.5
## v632             0.025070481 0.4945639989 0.469420920 0.0005487377 0.0103958618
## wantanotherchild 0.563110229 0.0002710959 0.002591306 0.2376295296 0.1963978394
## v313             0.001009065 0.5270968534 0.470196891 0.0015622162 0.0001349738
## currentchildren  0.646444641 0.0183738633 0.013740334 0.0197221044 0.3017190573
## v106             0.419533864 0.0004073516 0.002239817 0.5406601935 0.0371587736
## 
## $contrib
##                       Dim.1       Dim.2      Dim.3       Dim.4       Dim.5
## v632              1.5146787 47.52164347 48.9904172  0.06858168  1.90467898
## wantanotherchild 34.0213280  0.02604905  0.2704378 29.69913307 35.98305211
## v313              0.0609645 50.64765894 49.0714003  0.19524706  0.02472924
## currentchildren  39.0561279  1.76550696  1.4339896  2.46488475 55.27949081
## v106             25.3469009  0.03914159  0.2337552 67.57215344  6.80804886

#If deemed appropriate, conduct some testing of your index/components/latent variables.

#I am not going to run any tests since the variables do not all yield eigenvalues over 1, meaning that each variable is not necessarily a “component” for a latent variable.