PERM –– Employer Sponsored Green Card, is the process for obtaining labor certification, the first step of the green card process for foreign nationals seeking permanent residence through their employment. The U.S. employer requests a permanent labor certification by completing an Application for Permanent Employment Certification (“ETA Form 9089”).
The processing time for PERM LC filings has varied historically. As of this writing, if the PERM filing is not audited by the DOL and is submitted online, the decision (approval or denial) is generally received within six months from filing. Paper PERM filings can take significantly longer.
library(data.table)
library(psych)
library(ggplot2)
# load data
data <- read.csv("./source_lib/perm/PERM_Disclosure_Data_FY17.csv", stringsAsFactors = F)
data <- as.data.table(data)
dim(data)
Does the length of employer’s business establishment lead to shorter processing time?
Does the lower number of employees in the company put you at higher risk for adverse outcome for PERM decision?
Does employees with master’s degree level above more likely to belong to higher level of the prevailing wage determination?
Does the job that requires more experience in months likely to lead to positive PERM decision?
# First, subset data for what's required
data <- subset(data, select=c("CASE_NUMBER","CASE_STATUS","DECISION_DATE","CASE_RECEIVED_DATE","EMPLOYER_YR_ESTAB","EMPLOYER_NUM_EMPLOYEES","JOB_INFO_EXPERIENCE_NUM_MONTHS","FW_INFO_REQ_EXPERIENCE","PW_LEVEL_9089","PW_AMOUNT_9089","PW_UNIT_OF_PAY_9089","FOREIGN_WORKER_INFO_EDUCATION"))
# remove NA
data <- na.omit(data)
# * processing.time (days)
data$DECISION_DATE <- as.Date(data$DECISION_DATE)
data$CASE_RECEIVED_DATE <- as.Date(data$CASE_RECEIVED_DATE)
data$processing.time <- data$DECISION_DATE - data$CASE_RECEIVED_DATE
data$processing.time <- as.numeric(as.character(data$processing.time))
# * length of business (years)
# Year the employer commenced business or incorporated. If the employer is a private household employing a household domestic worker, this question may be skipped.
data$establishment <- ifelse(data$EMPLOYER_YR_ESTAB > 0, 2018 - data$EMPLOYER_YR_ESTAB, 0)
# create pay variable for yearly calculation
table(data$PW_UNIT_OF_PAY_9089)
##
## Hour Month Week Year
## 1 515 8 4 54649
data$pay <- data$PW_AMOUNT_9089
data$pay <- ifelse(data$PW_UNIT_OF_PAY_9089 == "Hour", data$PW_AMOUNT_9089 * 40 * 52, data$pay)
data$pay <- ifelse(data$PW_UNIT_OF_PAY_9089 == "Month", data$PW_AMOUNT_9089 * 12, data$pay)
data$pay <- ifelse(data$PW_UNIT_OF_PAY_9089 == "Week", data$PW_AMOUNT_9089 * 52, data$pay)
data$pay <- ifelse(data$PW_UNIT_OF_PAY_9089 == "Bi-Weekly", data$PW_AMOUNT_9089 * 26, data$pay)
# copy the column
data$number.of.employees <- data$EMPLOYER_NUM_EMPLOYEES
# edit the work experience
# if the candidate has not met the required experience, then the number of months of job experience is 0.
data$work.experience <- ifelse(data$FW_INFO_REQ_EXPERIENCE == "Y", data$JOB_INFO_EXPERIENCE_NUM_MONTHS, 0)
# edit the education part: divide category below/above mater's degree
table(data$FOREIGN_WORKER_INFO_EDUCATION)
##
## Associate's Bachelor's Doctorate High School Master's
## 3 496 24152 1324 1368 22690
## None Other
## 3829 1315
data$edu <- ifelse(data$FOREIGN_WORKER_INFO_EDUCATION == "Doctorate", "y", NA)
data$edu <- ifelse(data$FOREIGN_WORKER_INFO_EDUCATION == "Master's", "y", data$edu)
data$edu <- ifelse(data$FOREIGN_WORKER_INFO_EDUCATION == "Bachelor's", "n", data$edu)
data$edu <- ifelse(data$FOREIGN_WORKER_INFO_EDUCATION == "Associate's", "n", data$edu)
data$edu <- ifelse(data$FOREIGN_WORKER_INFO_EDUCATION == "High School", "n", data$edu)
# Subset the clean data
test.data <- subset(data, select = c('CASE_STATUS', 'processing.time', 'establishment', 'pay', 'number.of.employees', 'work.experience', 'edu'))
# Year of the employer commenced business or incorporated. If the employer is a private household employing a household domestic worker, this question may be skipped.
# We presume aforementioned are > 1000 and treat as outliers to remove manually.
test.data.q1 <- test.data
test.data.q1[establishment > 1000] <- 0
describe(test.data.q1)
## vars n mean sd median trimmed mad
## CASE_STATUS* 1 55177 0.00 0.00 0 0.00 0.00
## processing.time 2 55177 138.17 289.74 76 84.12 20.76
## establishment 3 55177 27.14 28.52 20 21.53 11.86
## pay 4 55177 94401.10 594108.98 90813 91454.06 26581.54
## number.of.employees 5 55177 25378.88 64330.98 1163 10902.90 1715.37
## work.experience 6 55177 16.92 22.47 6 13.08 8.90
## edu* 7 50034 0.00 0.00 0 0.00 0.00
## min max range skew kurtosis se
## CASE_STATUS* 0 0 0 NaN NaN 0.00
## processing.time 0 3509 3509 6.02 37.68 1.23
## establishment 0 824 824 4.28 43.92 0.12
## pay 0 138921120 138921120 231.34 54032.83 2529.22
## number.of.employees 0 3414000 3414000 14.49 622.74 273.87
## work.experience 0 240 240 1.49 2.26 0.10
## edu* 0 0 0 NaN NaN 0.00
summary(test.data.q1$processing.time)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 64.0 76.0 138.2 97.0 3509.0
summary(test.data.q1$establishment)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 12.00 20.00 27.14 30.00 824.00
ggplot(test.data.q1, aes(x = establishment)) +
geom_dotplot(binwidth = 1) +
labs(title = "number of years since business was established")
qqnorm(test.data.q1$establishment)
qqline(test.data.q1$establishment)
test.data.q2 <- test.data
test.data.q2.y <- test.data.q2[edu == "y"]
test.data.q2.n <- test.data.q2[edu == "n"]
describe(test.data.q2)
## vars n mean sd median trimmed mad
## CASE_STATUS* 1 55177 NaN NA NA NaN NA
## processing.time 2 55177 138.23 289.87 76 84.12 20.76
## establishment 3 55177 27.48 38.09 20 21.54 11.86
## pay 4 55177 94412.68 594108.17 90813 91458.26 26581.54
## number.of.employees 5 55177 25378.88 64330.98 1163 10902.91 1715.37
## work.experience 6 55177 16.93 22.47 6 13.08 8.90
## edu* 7 50030 NaN NA NA NaN NA
## min max range skew kurtosis se
## CASE_STATUS* Inf -Inf -Inf NA NA NA
## processing.time 0 3509 3509 6.02 37.66 1.23
## establishment 0 2017 2017 23.96 1140.07 0.16
## pay 15 138921120 138921105 231.35 54033.10 2529.22
## number.of.employees 0 3414000 3414000 14.49 622.74 273.87
## work.experience 0 240 240 1.49 2.26 0.10
## edu* Inf -Inf -Inf NA NA NA
describe(test.data.q2.y)
## vars n mean sd median trimmed mad
## CASE_STATUS* 1 24014 NaN NA NA NaN NA
## processing.time 2 24014 111.69 219.01 75 78.03 19.27
## establishment 3 24014 29.21 33.57 20 22.42 11.86
## pay 4 24014 97113.69 26486.67 95846 95965.75 24176.76
## number.of.employees 5 24014 31232.82 78143.77 2192 13503.72 3232.07
## work.experience 6 24014 11.55 18.18 0 7.47 0.00
## edu* 7 24014 NaN NA NA NaN NA
## min max range skew kurtosis se
## CASE_STATUS* Inf -Inf -Inf NA NA NA
## processing.time 0 3501 3501 8.07 70.27 1.41
## establishment 0 2008 2008 10.97 510.08 0.22
## pay 17080 233355 216275 0.65 1.73 170.92
## number.of.employees 0 3414000 3414000 16.06 617.02 504.27
## work.experience 0 180 180 2.10 5.24 0.12
## edu* Inf -Inf -Inf NA NA NA
describe(test.data.q2.n)
## vars n mean sd median trimmed mad
## CASE_STATUS* 1 26016 NaN NA NA NaN NA
## processing.time 2 26016 141.49 297.33 76 83.52 20.76
## establishment 3 26016 26.77 37.80 21 21.65 11.86
## pay 4 26016 98156.63 862387.74 90646 92421.93 26713.49
## number.of.employees 5 26016 23441.33 53519.10 1363 10931.45 2013.37
## work.experience 6 26016 21.64 25.59 12 18.84 17.79
## edu* 7 26016 NaN NA NA NaN NA
## min max range skew kurtosis se
## CASE_STATUS* Inf -Inf -Inf NA NA NA
## processing.time 0 3509 3509 5.59 31.69 1.84
## establishment 1 2014 2013 27.43 1315.76 0.23
## pay 15 138921120 138921105 160.36 25807.55 5346.66
## number.of.employees 0 2308000 2308000 6.53 146.20 331.81
## work.experience 0 240 240 0.99 0.35 0.16
## edu* Inf -Inf -Inf NA NA NA
ggplot(test.data.q2[edu == "y" | edu =="n"], aes(edu, pay)) + geom_point() + coord_flip() + scale_y_continuous()
masters.pay <- table(test.data.q2.y$pay)
non.masters.pay <- table(test.data.q2.n$pay)
barplot(masters.pay)
barplot(non.masters.pay)
test.data.certified <- test.data[CASE_STATUS == "Certified"]
test.data.denied <- test.data[CASE_STATUS == "Denied"]
describe(test.data$number.of.employees)
## vars n mean sd median trimmed mad min max
## X1 1 55177 25378.88 64330.98 1163 10902.91 1715.37 0 3414000
## range skew kurtosis se
## X1 3414000 14.49 622.74 273.87
describe(test.data.certified)
## vars n mean sd median trimmed mad
## CASE_STATUS* 1 24872 NaN NA NA NaN NA
## processing.time 2 24872 93.94 82.94 78 82.03 22.24
## establishment 3 24872 27.28 41.43 20 21.48 11.86
## pay 4 24872 93300.42 67111.88 91395 92793.74 27168.64
## number.of.employees 5 24872 29256.74 75866.18 1767 13083.20 2610.86
## work.experience 6 24872 16.33 22.28 6 12.43 8.90
## edu* 7 22788 NaN NA NA NaN NA
## min max range skew kurtosis se
## CASE_STATUS* Inf -Inf -Inf NA NA NA
## processing.time 37 3305 3268 17.21 435.23 0.53
## establishment 1 2014 2013 27.11 1224.03 0.26
## pay 16931 9493100 9476169 110.49 15469.87 425.54
## number.of.employees 0 3414000 3414000 17.08 673.81 481.05
## work.experience 0 240 240 1.47 1.98 0.14
## edu* Inf -Inf -Inf NA NA NA
describe(test.data.denied)
## vars n mean sd median trimmed mad
## CASE_STATUS* 1 2814 NaN NA NA NaN NA
## processing.time 2 2814 770.06 834.50 263.0 664.17 243.15
## establishment 3 2814 23.82 35.64 15.0 17.99 10.38
## pay 4 2814 79006.92 150221.51 70657.8 72138.72 43327.50
## number.of.employees 5 2814 6238.88 30966.06 40.0 159.53 51.89
## work.experience 6 2814 26.27 25.46 24.0 23.58 26.69
## edu* 7 2081 NaN NA NA NaN NA
## min max range skew kurtosis se
## CASE_STATUS* Inf -Inf -Inf NA NA NA
## processing.time 36 3509 3473 0.95 -0.67 15.73
## establishment 0 1048 1048 13.49 325.56 0.67
## pay 15 7149000 7148985 39.71 1791.72 2831.85
## number.of.employees 0 384000 384000 7.37 62.99 583.75
## work.experience 0 240 240 1.69 5.98 0.48
## edu* Inf -Inf -Inf NA NA NA
describeBy(test.data$number.of.employees,
group = test.data$CASE_STATUS, mat=TRUE)
## item group1 vars n mean sd median trimmed
## X11 1 Certified 1 24872 29256.741 75866.18 1767 13083.2012
## X12 2 Certified-Expired 1 25852 24349.801 54341.76 1597 10915.5025
## X13 3 Denied 1 2814 6238.875 30966.06 40 159.5306
## X14 4 Withdrawn 1 1639 15625.270 52824.65 140 3048.7814
## mad min max range skew kurtosis se
## X11 2610.859 0 3414000 3414000 17.078411 673.80706 481.0530
## X12 2357.334 0 2308000 2308000 5.846653 131.10859 337.9766
## X13 51.891 0 384000 384000 7.368527 62.98680 583.7459
## X14 200.151 1 725000 724999 5.648571 41.60154 1304.8097
ggplot(test.data, aes(number.of.employees)) + geom_histogram()
ggplot(test.data, aes(x = factor(0), number.of.employees)) +
geom_boxplot() +
scale_x_discrete(breaks = NULL) +
xlab(NULL)
summary(test.data$number.of.employees)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 52 1163 25379 23000 3414000
#prerequisite = if the candidate has not met the required experience, then the number of months of job experience is 0.
test.data4 <- test.data[work.experience > 0]
describe(test.data4$work.experience)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 29869 31.27 22.01 24 29.68 17.79 1 240 239 1.07 1.74
## se
## X1 0.13
test.data4.certified <- test.data4[CASE_STATUS == 'Certified']
test.data4.denied <- test.data4[CASE_STATUS == 'Denied']
describe(test.data4.certified)
## vars n mean sd median trimmed mad
## CASE_STATUS* 1 12876 NaN NA NA NaN NA
## processing.time 2 12876 97.72 100.04 78 83.02 20.76
## establishment 3 12876 23.97 49.14 19 18.74 10.38
## pay 4 12876 86146.54 88853.82 86216 84842.17 26397.69
## number.of.employees 5 12876 10649.91 23931.76 270 6274.01 394.37
## work.experience 6 12876 31.54 21.89 24 30.14 17.79
## edu* 7 11298 NaN NA NA NaN NA
## min max range skew kurtosis se
## CASE_STATUS* Inf -Inf -Inf NA NA NA
## processing.time 37 3305 3268 16.06 350.97 0.88
## establishment 1 2014 2013 30.80 1200.62 0.43
## pay 16931 9493100 9476169 92.17 9753.93 783.04
## number.of.employees 0 725000 725000 7.65 147.06 210.90
## work.experience 1 240 239 0.97 1.23 0.19
## edu* Inf -Inf -Inf NA NA NA
describe(test.data4.denied)
## vars n mean sd median trimmed mad
## CASE_STATUS* 1 2258 NaN NA NA NaN NA
## processing.time 2 2258 717.92 820.06 249.5 598.83 199.41
## establishment 3 2258 21.68 35.71 14.0 16.55 10.38
## pay 4 2258 76607.38 166903.55 62462.5 68097.74 42032.45
## number.of.employees 5 2258 1879.56 15215.51 28.0 65.95 35.58
## work.experience 6 2258 32.74 24.41 24.0 30.63 17.79
## edu* 7 1602 NaN NA NA NaN NA
## min max range skew kurtosis se
## CASE_STATUS* Inf -Inf -Inf NA NA NA
## processing.time 36 3509 3473 1.12 -0.35 17.26
## establishment 0 1048 1048 16.20 404.68 0.75
## pay 15 7149000 7148985 36.12 1466.86 3512.40
## number.of.employees 0 302000 302000 14.31 238.56 320.20
## work.experience 1 240 239 1.88 7.50 0.51
## edu* Inf -Inf -Inf NA NA NA
describeBy(test.data4$work.experience,
group = test.data4$CASE_STATUS, mat=TRUE)
## item group1 vars n mean sd median trimmed
## X11 1 Certified 1 12876 31.54240 21.88582 24 30.14114
## X12 2 Certified-Expired 1 13726 30.83622 21.70622 24 29.18758
## X13 3 Denied 1 2258 32.74181 24.40640 24 30.63108
## X14 4 Withdrawn 1 1009 30.27354 21.92010 24 28.42769
## mad min max range skew kurtosis se
## X11 17.7912 1 240 239 0.9663388 1.226462 0.1928734
## X12 17.7912 1 180 179 0.9600357 0.513925 0.1852730
## X13 17.7912 1 240 239 1.8799792 7.496963 0.5136198
## X14 17.7912 1 180 179 1.3043414 2.868141 0.6900759
ggplot(test.data4, aes(work.experience)) + geom_histogram(binwidth = 12) + xlim(c(0,120))