#load necessary packages
library(tidyverse)
library(readxl)
#import datasets
load("data/NLSY_0612.RData")
load("data/NLSY_8694.RData")
load("data/NLSY_9606.RData")
unemployment_data <- read_excel("data/Unemployment.xlsx")
#Question(s) # a. How many observations do they each have? Do they have the same variables as one # another? NLSY_0612.RData has 50744 NLSY_8694.RData has 63430 NLSY_9606.RData has 76116 Unemployment.xlsx” has 51
#Combine All NLSY Datasets into One
NLSY_combined <- bind_rows(NLSY_0612, NLSY_9606, NLSY_8694)
NLSY_cleaned <- NLSY_combined %>%
distinct(id, year, .keep_all = TRUE) # Removes duplicates based on (id, year)
NLSY_cleaned <- distinct(NLSY_combined)
# Convert unemployment_data to long format
unemployment_long <- unemployment_data %>%
pivot_longer(
cols = starts_with("y"), # Selects columns that start with "y"
names_to = "year", # Creates a new column named "year"
values_to = "unemployment_rate" # Stores values in this new column
) %>%
mutate(year = as.numeric(sub("y", "", year))) # Remove "y" prefix and convert to numeric
NLSY_final <- left_join(NLSY_cleaned, unemployment_long, by = "year")
## Warning in left_join(NLSY_cleaned, unemployment_long, by = "year"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 27 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
head(NLSY_final)
## # A tibble: 6 × 24
## id year afqt age amount1 amount2 black educ female formerlymarried
## <dbl> <dbl> <dbl> <hvn_lbl> <dbl> <dbl> <dbl> <hvn> <dbl> <dbl>
## 1 1 2006 NA NA NA NA 0 NA 1 NA
## 2 1 2006 NA NA NA NA 0 NA 1 NA
## 3 1 2006 NA NA NA NA 0 NA 1 NA
## 4 1 2006 NA NA NA NA 0 NA 1 NA
## 5 1 2006 NA NA NA NA 0 NA 1 NA
## 6 1 2006 NA NA NA NA 0 NA 1 NA
## # ℹ 14 more variables: height1985 <hvn_lbll>, hispanic <dbl>, hours <hvn_lbll>,
## # income <hvn_lbll>, married <dbl>, plan <dbl>, risk1 <dbl>, risk2 <dbl>,
## # weight <hvn_lbll>, state_fips2006 <dbl>, height <dbl>, Fips <chr>,
## # Area <chr>, unemployment_rate <dbl>
table(NLSY_final$year, useNA = "ifany") # Check if there are missing values
##
## 1986 1988 1990 1992 1994 1996 1998 2000 2002 2004 2006
## 646986 646986 646986 646986 646986 646986 646986 646986 646986 646986 646986
## 2008 2010 2012
## 646986 646986 646986
Question(s) # b. How many observations were there before deduplicating and after deduplicating? before was 190290 obs and after is 177604 obs for NLSY dataset
unemployment_data <- read_excel("data/Unemployment.xlsx")
#Question(s) # c. What is the average state unemployment rate in 1996?
# Filter for the year 1996
unemployment_1996 <- unemployment_data %>%
select(Fips, Area, y1996) %>% # Select relevant columns
rename(unemployment_rate_1996 = y1996) # Rename column for clarity
# View the data for 1996
head(unemployment_1996)
## # A tibble: 6 × 3
## Fips Area unemployment_rate_1996
## <chr> <chr> <dbl>
## 1 01000 Alabama 5.2
## 2 02000 Alaska 7.6
## 3 04000 Arizona 5.6
## 4 05000 Arkansas 5.3
## 5 06000 California 7.3
## 6 08000 Colorado 4.2
# Calculate the average unemployment rate in 1996
average_unemployment_1996 <- mean(unemployment_1996$unemployment_rate_1996, na.rm = TRUE)
# Print the result
average_unemployment_1996
## [1] 5.147059
# Convert FIPS codes in unemployment data to numeric
unemployment_data <- unemployment_data %>%
mutate(Fips = as.numeric(substr(Fips, 1, 5))) # Extract the first 5 characters and convert to numeric
# Check the structure to confirm changes
str(unemployment_data)
## tibble [51 × 41] (S3: tbl_df/tbl/data.frame)
## $ Fips : num [1:51] 1000 2000 4000 5000 6000 8000 9000 10000 11000 12000 ...
## $ Area : chr [1:51] "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ y1980: num [1:51] 8.9 9.6 6.6 7.6 6.8 5.8 5.8 7.6 7.4 6.1 ...
## $ y1981: num [1:51] 10.6 9.4 6.2 8.7 7.4 5.7 6.2 7.8 8.9 6.7 ...
## $ y1982: num [1:51] 14.1 9.9 10.1 9.9 10 7.3 6.9 8.4 10.4 8.4 ...
## $ y1983: num [1:51] 13.8 9.9 8.8 9.9 9.8 7.1 6 7.7 11.2 8.5 ...
## $ y1984: num [1:51] 11 9.8 5.2 8.7 7.8 5.4 4.7 6.3 9.1 6.4 ...
## $ y1985: num [1:51] 9.2 9.7 6.3 8.7 7.2 6.1 4.7 5.1 8.1 6 ...
## $ y1986: num [1:51] 9.7 10.9 6.9 8.6 6.7 7.5 3.8 4.3 7.5 5.8 ...
## $ y1987: num [1:51] 8.1 10.3 6.5 8.1 5.8 7.5 3.4 3.1 6.3 5.3 ...
## $ y1988: num [1:51] 7.2 8.8 6.4 7.6 5.3 6.5 3 3.1 5 5.1 ...
## $ y1989: num [1:51] 7 7 5.3 7 5.1 5.7 3.8 3.5 4.9 5.6 ...
## $ y1990: num [1:51] 6.8 7.2 5.3 6.9 5.8 5.2 5.1 4.7 6.3 6.1 ...
## $ y1991: num [1:51] 7.3 8.5 5.9 7.4 7.7 5.3 6.5 6.2 7.8 7.5 ...
## $ y1992: num [1:51] 7.6 8.9 7.5 7.1 9.3 6 7.6 5.4 8.6 8.2 ...
## $ y1993: num [1:51] 7.3 7.7 6.4 6.1 9.5 5.3 6.6 5.1 8.5 7.1 ...
## $ y1994: num [1:51] 6.2 7.6 6.1 5.4 8.6 4.2 5.6 4.8 8 6.4 ...
## $ y1995: num [1:51] 6 7.3 5.3 4.8 7.9 4 5.5 4.3 8.8 5.5 ...
## $ y1996: num [1:51] 5.2 7.6 5.6 5.3 7.3 4.2 5.5 5.1 8.4 5.2 ...
## $ y1997: num [1:51] 5 7.1 4.6 5.2 6.4 3.5 5 4 8 4.8 ...
## $ y1998: num [1:51] 4.4 6.3 4.3 5.2 5.9 3.6 3.4 3.7 8.4 4.3 ...
## $ y1999: num [1:51] 4.7 6.5 4.4 4.6 5.2 3.1 2.9 3.4 6.4 3.9 ...
## $ y2000: num [1:51] 4.6 6.4 4 4.3 4.9 2.8 2.4 3.7 5.6 3.7 ...
## $ y2001: num [1:51] 5.1 6.4 4.8 5 5.4 3.8 3.1 3.5 6.3 4.7 ...
## $ y2002: num [1:51] 5.9 7.3 6.1 5.5 6.7 5.5 4.3 4 6.4 5.6 ...
## $ y2003: num [1:51] 6 7.8 5.7 5.9 6.8 6 5.4 4.3 6.8 5.2 ...
## $ y2004: num [1:51] 5.7 7.5 5 5.7 6.2 5.5 5 4 7.8 4.6 ...
## $ y2005: num [1:51] 4.5 6.9 4.7 5.2 5.4 5 4.9 4.1 6.4 3.7 ...
## $ y2006: num [1:51] 4 6.6 4.2 5.2 4.9 4.3 4.3 3.6 5.8 3.2 ...
## $ y2007: num [1:51] 4 6.3 3.9 5.3 5.4 3.7 4.5 3.4 5.5 4 ...
## $ y2008: num [1:51] 5.7 6.7 6.2 5.5 7.3 4.8 5.7 4.9 6.5 6.3 ...
## $ y2009: num [1:51] 11 7.7 9.9 7.8 11.2 7.3 7.9 8.3 9.3 10.4 ...
## $ y2010: num [1:51] 10.5 7.9 10.4 8.2 12.2 8.7 9.1 8.4 9.4 11.1 ...
## $ y2011: num [1:51] 9.6 7.6 9.5 8.3 11.7 8.4 8.8 7.5 10.2 10 ...
## $ y2012: num [1:51] 8 7.1 8.3 7.6 10.4 7.9 8.3 7.2 9 8.5 ...
## $ y2013: num [1:51] 7.2 7 7.7 7.2 8.9 6.9 7.8 6.7 8.5 7.2 ...
## $ y2014: num [1:51] 6.8 6.9 6.8 6 7.5 5 6.6 5.7 7.8 6.3 ...
## $ y2015: num [1:51] 6.1 6.5 6.1 5 6.2 3.9 5.7 4.9 6.9 5.5 ...
## $ y2016: num [1:51] 5.8 6.9 5.4 4 5.5 3.2 5.1 4.5 6.1 4.8 ...
## $ y2017: num [1:51] 4.4 7 4.9 3.7 4.8 2.7 4.7 4.5 6.1 4.2 ...
## $ y2018: num [1:51] 3.9 6.6 4.8 3.7 4.2 3.3 4.1 3.8 5.6 3.6 ...
# Rename the columns to match in both datasets
NLSY_cleaned <- NLSY_cleaned %>%
rename(Fips = state_fips2006) # Change to the correct column name in the NLSY dataset
# Merge NLSY and unemployment data by FIPS code
NLSY_final <- left_join(NLSY_cleaned, unemployment_data, by = "Fips")
# View the first few rows of the merged dataset
head(NLSY_final)
## # A tibble: 6 × 61
## id year afqt age amount1 amount2 black educ female formerlymarried
## <dbl> <dbl> <dbl> <hvn_lbl> <dbl> <dbl> <dbl> <hvn> <dbl> <dbl>
## 1 1 2006 NA NA NA NA 0 NA 1 NA
## 2 1 2008 NA NA NA NA 0 NA 1 NA
## 3 1 2010 NA NA NA NA 0 NA 1 NA
## 4 1 2012 NA NA NA NA 0 NA 1 NA
## 5 2 2006 6.84 47 0 0 0 12 1 0
## 6 2 2008 6.84 49 0 0 0 12 1 0
## # ℹ 51 more variables: height1985 <hvn_lbll>, hispanic <dbl>, hours <hvn_lbll>,
## # income <hvn_lbll>, married <dbl>, plan <dbl>, risk1 <dbl>, risk2 <dbl>,
## # weight <hvn_lbll>, Fips <dbl>, height <dbl>, Area <chr>, y1980 <dbl>,
## # y1981 <dbl>, y1982 <dbl>, y1983 <dbl>, y1984 <dbl>, y1985 <dbl>,
## # y1986 <dbl>, y1987 <dbl>, y1988 <dbl>, y1989 <dbl>, y1990 <dbl>,
## # y1991 <dbl>, y1992 <dbl>, y1993 <dbl>, y1994 <dbl>, y1995 <dbl>,
## # y1996 <dbl>, y1997 <dbl>, y1998 <dbl>, y1999 <dbl>, y2000 <dbl>, …
# Check for missing FIPS codes
sum(is.na(NLSY_final$unemployment_rate)) # Replace with the actual name of the unemployment rate column
## [1] 0
61 variables
# Convert 'Fips' in the unemployment_long dataset from character to numeric
unemployment_long$Fips <- as.numeric(unemployment_long$Fips)
# Now, join the datasets using the correct 'Fips' column
NLSY_final <- left_join(NLSY_cleaned, unemployment_long, by = c("Fips", "year"))
# Check the result
head(NLSY_final)
## # A tibble: 6 × 23
## id year afqt age amount1 amount2 black educ female formerlymarried
## <dbl> <dbl> <dbl> <hvn_lbl> <dbl> <dbl> <dbl> <hvn> <dbl> <dbl>
## 1 1 2006 NA NA NA NA 0 NA 1 NA
## 2 1 2008 NA NA NA NA 0 NA 1 NA
## 3 1 2010 NA NA NA NA 0 NA 1 NA
## 4 1 2012 NA NA NA NA 0 NA 1 NA
## 5 2 2006 6.84 47 0 0 0 12 1 0
## 6 2 2008 6.84 49 0 0 0 12 1 0
## # ℹ 13 more variables: height1985 <hvn_lbll>, hispanic <dbl>, hours <hvn_lbll>,
## # income <hvn_lbll>, married <dbl>, plan <dbl>, risk1 <dbl>, risk2 <dbl>,
## # weight <hvn_lbll>, Fips <dbl>, height <dbl>, Area <chr>,
## # unemployment_rate <dbl>
NLSY_final <- NLSY_final %>%
mutate(unemployment_rate_year = ifelse(year %in% 1980:2018, unemployment_rate, NA_real_))
# After merging, just use the unemployment_rate already assigned
NLSY_final <- NLSY_final %>%
mutate(unemployment_rate_year = unemployment_rate)
# Drop the extra yearly unemployment rate columns
NLSY_final <- NLSY_final %>%
select(-starts_with("y")) # Remove all columns starting with "y"
# Check the first few rows of the final dataset
head(NLSY_final)
## # A tibble: 6 × 23
## id afqt age amount1 amount2 black educ female formerlymarried
## <dbl> <dbl> <hvn_lbll> <dbl> <dbl> <dbl> <hvn_lbll> <dbl> <dbl>
## 1 1 NA NA NA NA 0 NA 1 NA
## 2 1 NA NA NA NA 0 NA 1 NA
## 3 1 NA NA NA NA 0 NA 1 NA
## 4 1 NA NA NA NA 0 NA 1 NA
## 5 2 6.84 47 0 0 0 12 1 0
## 6 2 6.84 49 0 0 0 12 1 0
## # ℹ 14 more variables: height1985 <hvn_lbll>, hispanic <dbl>, hours <hvn_lbll>,
## # income <hvn_lbll>, married <dbl>, plan <dbl>, risk1 <dbl>, risk2 <dbl>,
## # weight <hvn_lbll>, Fips <dbl>, height <dbl>, Area <chr>,
## # unemployment_rate <dbl>, unemployment_rate_year <dbl>
# Verify that the unemployment rate for each year and FIPS code is correct
summary(NLSY_final$unemployment_rate_year)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## NA NA NA NaN NA NA 177604
# Calculate the minimum unemployment rate
min_unemployment_rate <- min(NLSY_final$unemployment_rate_year, na.rm = TRUE)
# Calculate the maximum unemployment rate
max_unemployment_rate <- max(NLSY_final$unemployment_rate_year, na.rm = TRUE)
# Calculate the average unemployment rate
avg_unemployment_rate <- mean(NLSY_final$unemployment_rate_year, na.rm = TRUE)
# Print the results
min_unemployment_rate
## [1] Inf
max_unemployment_rate
## [1] -Inf
avg_unemployment_rate
## [1] NaN
#6) Using a loop over the fips codes in the previously made dataset (state_fips2006), create a unique dataset for each state. You ca have your loop go over a sequence from 1 (the minimum fips code) to 56 (the maximum fips code). Question(s)
# Create an empty list to store datasets for each FIPS code
state_datasets <- list()
# Loop over each FIPS code (from 1 to 56)
for (fips_code in 1:56) {
# Filter the NLSY dataset for each FIPS code and store it in the list
state_data <- NLSY_final %>% filter(Fips == fips_code)
# Dynamically name the datasets based on the FIPS code
state_datasets[[paste0("state_", fips_code)]] <- state_data
}
# Check the names of the generated datasets
names(state_datasets)
## [1] "state_1" "state_2" "state_3" "state_4" "state_5" "state_6"
## [7] "state_7" "state_8" "state_9" "state_10" "state_11" "state_12"
## [13] "state_13" "state_14" "state_15" "state_16" "state_17" "state_18"
## [19] "state_19" "state_20" "state_21" "state_22" "state_23" "state_24"
## [25] "state_25" "state_26" "state_27" "state_28" "state_29" "state_30"
## [31] "state_31" "state_32" "state_33" "state_34" "state_35" "state_36"
## [37] "state_37" "state_38" "state_39" "state_40" "state_41" "state_42"
## [43] "state_43" "state_44" "state_45" "state_46" "state_47" "state_48"
## [49] "state_49" "state_50" "state_51" "state_52" "state_53" "state_54"
## [55] "state_55" "state_56"
# Access the dataset for a specific state
state_1_data <- state_datasets[["state_1"]]
# View the first few rows of the dataset for state 1
head(state_1_data)
## # A tibble: 6 × 23
## id afqt age amount1 amount2 black educ female formerlymarried
## <dbl> <dbl> <hvn_lbll> <dbl> <dbl> <dbl> <hvn_lbll> <dbl> <dbl>
## 1 75 83.4 44 2000 1100 0 16 1 0
## 2 75 83.4 46 2000 1100 0 16 1 0
## 3 75 83.4 48 2000 1100 0 16 1 0
## 4 75 83.4 51 2000 1100 0 16 1 0
## 5 85 2.43 46 5000 300 1 11 0 0
## 6 85 2.43 48 5000 300 1 11 0 0
## # ℹ 14 more variables: height1985 <hvn_lbll>, hispanic <dbl>, hours <hvn_lbll>,
## # income <hvn_lbll>, married <dbl>, plan <dbl>, risk1 <dbl>, risk2 <dbl>,
## # weight <hvn_lbll>, Fips <dbl>, height <dbl>, Area <chr>,
## # unemployment_rate <dbl>, unemployment_rate_year <dbl>
# Count observations for FIPS code 3
count_fips_3 <- NLSY_final %>% filter(Fips == 3) %>% nrow()
# Count observations for FIPS code 30
count_fips_30 <- NLSY_final %>% filter(Fips == 30) %>% nrow()
# Print the results
count_fips_3
## [1] 0
count_fips_30
## [1] 3486
# Create an empty list to store datasets for each FIPS code
state_datasets <- list()
# Loop over each FIPS code (from 1 to 56)
for (fips_code in 1:56) {
# Filter the NLSY dataset for each FIPS code
state_data <- NLSY_final %>% filter(Fips == fips_code)
# Check if the state data is not empty
if (nrow(state_data) > 0) {
# Store the dataset in the list
state_datasets[[paste0("state_", fips_code)]] <- state_data
# Display summary statistics for the 'afqt' variable
cat("FIPS Code:", fips_code, "\n")
summary_afqt <- summary(state_data$afqt)
print(summary_afqt)
# Additional stats for afqt (optional)
afqt_min <- min(state_data$afqt, na.rm = TRUE)
afqt_max <- max(state_data$afqt, na.rm = TRUE)
afqt_mean <- mean(state_data$afqt, na.rm = TRUE)
afqt_sd <- sd(state_data$afqt, na.rm = TRUE)
cat("Minimum AFQT:", afqt_min, "\n")
cat("Maximum AFQT:", afqt_max, "\n")
cat("Mean AFQT:", afqt_mean, "\n")
cat("Standard Deviation of AFQT:", afqt_sd, "\n\n")
} else {
cat("No data for FIPS Code:", fips_code, "\n\n")
}
}
## FIPS Code: 1
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.187 13.225 39.643 42.154 70.767 98.687 98
## Minimum AFQT: 0.187
## Maximum AFQT: 98.687
## Mean AFQT: 42.15396
## Standard Deviation of AFQT: 30.21364
##
## FIPS Code: 2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 21.05 41.59 44.35 67.25 97.41 280
## Minimum AFQT: 0
## Maximum AFQT: 97.407
## Mean AFQT: 44.35419
## Standard Deviation of AFQT: 26.75407
##
## No data for FIPS Code: 3
##
## FIPS Code: 4
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 13.98 33.38 40.66 67.88 98.48 140
## Minimum AFQT: 0
## Maximum AFQT: 98.483
## Mean AFQT: 40.65865
## Standard Deviation of AFQT: 30.00163
##
## FIPS Code: 5
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.259 14.461 36.821 42.328 67.714 100.000 196
## Minimum AFQT: 0.259
## Maximum AFQT: 100
## Mean AFQT: 42.32787
## Standard Deviation of AFQT: 29.95131
##
## FIPS Code: 6
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.087 19.121 40.312 42.220 63.671 100.000 168
## Minimum AFQT: 0.087
## Maximum AFQT: 100
## Mean AFQT: 42.21978
## Standard Deviation of AFQT: 27.14101
##
## No data for FIPS Code: 7
##
## FIPS Code: 8
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.107 16.831 38.748 43.370 71.692 100.000 308
## Minimum AFQT: 1.107
## Maximum AFQT: 100
## Mean AFQT: 43.3705
## Standard Deviation of AFQT: 29.52152
##
## FIPS Code: 9
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 16.45 35.29 40.49 64.20 99.34 154
## Minimum AFQT: 0
## Maximum AFQT: 99.344
## Mean AFQT: 40.48614
## Standard Deviation of AFQT: 28.03135
##
## FIPS Code: 10
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.116 17.116 36.101 40.754 61.837 100.000 238
## Minimum AFQT: 0.116
## Maximum AFQT: 100
## Mean AFQT: 40.75447
## Standard Deviation of AFQT: 27.9723
##
## FIPS Code: 11
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.258 19.101 40.773 43.647 67.886 98.230 196
## Minimum AFQT: 0.258
## Maximum AFQT: 98.23
## Mean AFQT: 43.64685
## Standard Deviation of AFQT: 27.60704
##
## FIPS Code: 12
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 17.78 41.75 42.74 66.06 99.02 210
## Minimum AFQT: 0
## Maximum AFQT: 99.022
## Mean AFQT: 42.73683
## Standard Deviation of AFQT: 27.61351
##
## FIPS Code: 13
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 12.57 35.07 39.52 63.95 99.91 182
## Minimum AFQT: 0
## Maximum AFQT: 99.914
## Mean AFQT: 39.5249
## Standard Deviation of AFQT: 28.94561
##
## No data for FIPS Code: 14
##
## FIPS Code: 15
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 12.25 32.24 39.27 61.17 99.37 196
## Minimum AFQT: 0
## Maximum AFQT: 99.37
## Mean AFQT: 39.27284
## Standard Deviation of AFQT: 29.38114
##
## FIPS Code: 16
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 15.08 35.55 40.53 61.78 98.09 252
## Minimum AFQT: 0
## Maximum AFQT: 98.092
## Mean AFQT: 40.52729
## Standard Deviation of AFQT: 29.16537
##
## FIPS Code: 17
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.074 17.395 35.799 40.972 64.484 100.000 126
## Minimum AFQT: 0.074
## Maximum AFQT: 100
## Mean AFQT: 40.97183
## Standard Deviation of AFQT: 28.41638
##
## FIPS Code: 18
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 13.34 39.56 42.89 67.08 99.46 294
## Minimum AFQT: 0
## Maximum AFQT: 99.46
## Mean AFQT: 42.89013
## Standard Deviation of AFQT: 29.88843
##
## FIPS Code: 19
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 14.92 32.62 38.93 60.28 99.90 168
## Minimum AFQT: 0
## Maximum AFQT: 99.9
## Mean AFQT: 38.9318
## Standard Deviation of AFQT: 28.15205
##
## FIPS Code: 20
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.107 14.519 38.066 40.710 61.539 99.707 238
## Minimum AFQT: 0.107
## Maximum AFQT: 99.707
## Mean AFQT: 40.71019
## Standard Deviation of AFQT: 27.93543
##
## FIPS Code: 21
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.519 15.112 35.478 39.287 59.202 99.492 154
## Minimum AFQT: 0.519
## Maximum AFQT: 99.492
## Mean AFQT: 39.28716
## Standard Deviation of AFQT: 27.29503
##
## FIPS Code: 22
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 16.29 40.13 43.31 68.20 100.00 224
## Minimum AFQT: 0
## Maximum AFQT: 100
## Mean AFQT: 43.30815
## Standard Deviation of AFQT: 29.45206
##
## FIPS Code: 23
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 16.73 38.82 44.40 70.42 99.38 196
## Minimum AFQT: 0
## Maximum AFQT: 99.384
## Mean AFQT: 44.40158
## Standard Deviation of AFQT: 30.48708
##
## FIPS Code: 24
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.093 17.630 38.403 44.325 75.293 100.000 280
## Minimum AFQT: 0.093
## Maximum AFQT: 100
## Mean AFQT: 44.32483
## Standard Deviation of AFQT: 30.3039
##
## FIPS Code: 25
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 19.01 42.78 43.75 64.36 100.00 210
## Minimum AFQT: 0
## Maximum AFQT: 100
## Mean AFQT: 43.75475
## Standard Deviation of AFQT: 27.82462
##
## FIPS Code: 26
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 18.82 41.46 44.80 69.11 100.00 196
## Minimum AFQT: 0
## Maximum AFQT: 100
## Mean AFQT: 44.80095
## Standard Deviation of AFQT: 29.82112
##
## FIPS Code: 27
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.117 19.337 38.768 42.336 64.846 98.089 224
## Minimum AFQT: 0.117
## Maximum AFQT: 98.089
## Mean AFQT: 42.33572
## Standard Deviation of AFQT: 26.96962
##
## FIPS Code: 28
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.36 18.24 41.19 43.87 68.68 100.00 252
## Minimum AFQT: 0.36
## Maximum AFQT: 100
## Mean AFQT: 43.86872
## Standard Deviation of AFQT: 28.56471
##
## FIPS Code: 29
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.162 12.978 38.179 40.566 62.881 99.523 182
## Minimum AFQT: 0.162
## Maximum AFQT: 99.523
## Mean AFQT: 40.56563
## Standard Deviation of AFQT: 28.50243
##
## FIPS Code: 30
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.069 16.022 36.048 40.543 62.645 99.819 126
## Minimum AFQT: 0.069
## Maximum AFQT: 99.819
## Mean AFQT: 40.54345
## Standard Deviation of AFQT: 28.58826
##
## FIPS Code: 31
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.637 18.917 41.041 43.819 68.247 100.000 280
## Minimum AFQT: 0.637
## Maximum AFQT: 100
## Mean AFQT: 43.81875
## Standard Deviation of AFQT: 28.63352
##
## FIPS Code: 32
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.499 19.035 41.377 44.165 67.619 100.000 126
## Minimum AFQT: 0.499
## Maximum AFQT: 100
## Mean AFQT: 44.16536
## Standard Deviation of AFQT: 28.47262
##
## FIPS Code: 33
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 12.98 35.70 40.19 64.20 100.00 168
## Minimum AFQT: 0
## Maximum AFQT: 100
## Mean AFQT: 40.187
## Standard Deviation of AFQT: 29.30607
##
## FIPS Code: 34
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.226 17.232 38.916 42.421 65.832 98.699 224
## Minimum AFQT: 0.226
## Maximum AFQT: 98.699
## Mean AFQT: 42.42125
## Standard Deviation of AFQT: 28.24989
##
## FIPS Code: 35
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.618 20.097 40.419 43.299 65.800 99.453 238
## Minimum AFQT: 0.618
## Maximum AFQT: 99.453
## Mean AFQT: 43.29938
## Standard Deviation of AFQT: 26.75749
##
## FIPS Code: 36
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 14.60 38.72 41.48 66.91 100.00 336
## Minimum AFQT: 0
## Maximum AFQT: 100
## Mean AFQT: 41.47694
## Standard Deviation of AFQT: 28.67648
##
## FIPS Code: 37
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.094 20.368 38.334 43.027 64.109 98.376 154
## Minimum AFQT: 0.094
## Maximum AFQT: 98.376
## Mean AFQT: 43.02704
## Standard Deviation of AFQT: 27.47461
##
## FIPS Code: 38
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 20.08 37.70 43.83 67.76 99.95 168
## Minimum AFQT: 0
## Maximum AFQT: 99.95
## Mean AFQT: 43.825
## Standard Deviation of AFQT: 28.46742
##
## FIPS Code: 39
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.44 19.81 43.92 46.15 70.36 100.00 238
## Minimum AFQT: 0.44
## Maximum AFQT: 100
## Mean AFQT: 46.1475
## Standard Deviation of AFQT: 28.68517
##
## FIPS Code: 40
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 13.31 36.05 41.64 68.51 100.00 336
## Minimum AFQT: 0
## Maximum AFQT: 100
## Mean AFQT: 41.64131
## Standard Deviation of AFQT: 29.73494
##
## FIPS Code: 41
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 17.33 39.17 43.37 67.25 100.00 252
## Minimum AFQT: 0
## Maximum AFQT: 100
## Mean AFQT: 43.37212
## Standard Deviation of AFQT: 28.87553
##
## FIPS Code: 42
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.179 16.586 39.358 41.688 62.116 100.000 196
## Minimum AFQT: 0.179
## Maximum AFQT: 100
## Mean AFQT: 41.68755
## Standard Deviation of AFQT: 28.46019
##
## No data for FIPS Code: 43
##
## FIPS Code: 44
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.197 13.943 34.850 40.082 62.653 98.312 266
## Minimum AFQT: 0.197
## Maximum AFQT: 98.312
## Mean AFQT: 40.08193
## Standard Deviation of AFQT: 28.45845
##
## FIPS Code: 45
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.331 16.205 37.491 42.881 68.923 100.000 238
## Minimum AFQT: 0.331
## Maximum AFQT: 100
## Mean AFQT: 42.88115
## Standard Deviation of AFQT: 29.98714
##
## FIPS Code: 46
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.27 18.71 42.23 42.83 64.08 98.95 210
## Minimum AFQT: 0.27
## Maximum AFQT: 98.955
## Mean AFQT: 42.83106
## Standard Deviation of AFQT: 27.64589
##
## FIPS Code: 47
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.491 21.326 45.234 47.039 72.963 100.000 238
## Minimum AFQT: 0.491
## Maximum AFQT: 100
## Mean AFQT: 47.03922
## Standard Deviation of AFQT: 30.37621
##
## FIPS Code: 48
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.349 16.707 38.539 42.941 68.830 99.486 196
## Minimum AFQT: 0.349
## Maximum AFQT: 99.486
## Mean AFQT: 42.94067
## Standard Deviation of AFQT: 29.40977
##
## FIPS Code: 49
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 17.50 39.20 42.62 66.90 99.93 210
## Minimum AFQT: 0
## Maximum AFQT: 99.929
## Mean AFQT: 42.62404
## Standard Deviation of AFQT: 29.12959
##
## FIPS Code: 50
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.336 18.852 38.630 42.201 64.230 100.000 98
## Minimum AFQT: 0.336
## Maximum AFQT: 100
## Mean AFQT: 42.20145
## Standard Deviation of AFQT: 28.07051
##
## FIPS Code: 51
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.102 16.822 41.469 44.034 69.959 100.000 168
## Minimum AFQT: 0.102
## Maximum AFQT: 100
## Mean AFQT: 44.03448
## Standard Deviation of AFQT: 29.58854
##
## No data for FIPS Code: 52
##
## FIPS Code: 53
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 17.48 37.15 41.71 61.65 99.40 196
## Minimum AFQT: 0
## Maximum AFQT: 99.403
## Mean AFQT: 41.70553
## Standard Deviation of AFQT: 28.74522
##
## FIPS Code: 54
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.094 17.499 40.716 42.367 62.463 99.528 210
## Minimum AFQT: 0.094
## Maximum AFQT: 99.528
## Mean AFQT: 42.36655
## Standard Deviation of AFQT: 28.01582
##
## FIPS Code: 55
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 14.31 34.31 41.45 67.08 100.00 196
## Minimum AFQT: 0
## Maximum AFQT: 100
## Mean AFQT: 41.44775
## Standard Deviation of AFQT: 30.05459
##
## FIPS Code: 56
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.866 17.463 37.518 43.065 67.610 99.878 378
## Minimum AFQT: 0.866
## Maximum AFQT: 99.878
## Mean AFQT: 43.06532
## Standard Deviation of AFQT: 28.94587
# Check the names of the generated datasets
names(state_datasets)
## [1] "state_1" "state_2" "state_4" "state_5" "state_6" "state_8"
## [7] "state_9" "state_10" "state_11" "state_12" "state_13" "state_15"
## [13] "state_16" "state_17" "state_18" "state_19" "state_20" "state_21"
## [19] "state_22" "state_23" "state_24" "state_25" "state_26" "state_27"
## [25] "state_28" "state_29" "state_30" "state_31" "state_32" "state_33"
## [31] "state_34" "state_35" "state_36" "state_37" "state_38" "state_39"
## [37] "state_40" "state_41" "state_42" "state_44" "state_45" "state_46"
## [43] "state_47" "state_48" "state_49" "state_50" "state_51" "state_53"
## [49] "state_54" "state_55" "state_56"
# Loop through each year in the dataset
years <- unique(NLSY_final$year)
## Warning: Unknown or uninitialised column: `year`.
for (year in years) {
# Filter the dataset for the current year
year_data <- NLSY_final %>% filter(year == year)
# Print the year for which the summary statistics are being calculated
cat("Summary statistics for year:", year, "\n")
# Summary statistics for 'afqt'
cat("Summary statistics for AFQT:\n")
afqt_summary <- summary(year_data$afqt)
print(afqt_summary)
# Additional stats for 'afqt' (optional)
afqt_min <- min(year_data$afqt, na.rm = TRUE)
afqt_max <- max(year_data$afqt, na.rm = TRUE)
afqt_mean <- mean(year_data$afqt, na.rm = TRUE)
afqt_sd <- sd(year_data$afqt, na.rm = TRUE)
cat("Minimum AFQT:", afqt_min, "\n")
cat("Maximum AFQT:", afqt_max, "\n")
cat("Mean AFQT:", afqt_mean, "\n")
cat("Standard Deviation of AFQT:", afqt_sd, "\n")
# Summary statistics for 'unemployment_rate_year'
cat("Summary statistics for Unemployment Rate:\n")
unemployment_summary <- summary(year_data$unemployment_rate_year)
print(unemployment_summary)
}
# Check the column names in the NLSY_final dataset to make sure 'year' is present
colnames(NLSY_final)
## [1] "id" "afqt" "age"
## [4] "amount1" "amount2" "black"
## [7] "educ" "female" "formerlymarried"
## [10] "height1985" "hispanic" "hours"
## [13] "income" "married" "plan"
## [16] "risk1" "risk2" "weight"
## [19] "Fips" "height" "Area"
## [22] "unemployment_rate" "unemployment_rate_year"
# Convert 'unemployment_rate_year' to numeric if it's not already (assuming it's a character or factor)
NLSY_final$unemployment_rate_year <- as.numeric(as.character(NLSY_final$unemployment_rate_year))
# Filter the data for the year 2008 (AFQT)
data_2008 <- NLSY_final %>% filter(unemployment_rate_year == 2008)
# Calculate the median AFQT in 2008
median_afqt_2008 <- median(data_2008$afqt, na.rm = TRUE)
cat("Median AFQT in 2008:", median_afqt_2008, "\n")
## Median AFQT in 2008: NA
# Filter the data for the year 2009 (Unemployment Rate)
data_2009 <- NLSY_final %>% filter(unemployment_rate_year == 2009)
# Calculate the median unemployment rate in 2009
median_unemployment_2009 <- median(data_2009$unemployment_rate, na.rm = TRUE)
cat("Median Unemployment Rate in 2009:", median_unemployment_2009, "\n")
## Median Unemployment Rate in 2009: NA