Misallocation in brasilian labor market

import os
os.getcwd()
os.chdir('D:\\Git projects\\college_works\\Thesis')

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from math import gamma
import time
import pandas as pd


def par():
    global beta, eta, varphi, theta, rho, i, r, gamma1, phi, kappa, alfa, psi, sig, pi, nu
    beta = 0.69
    eta = 0.25
    varphi = 0.25
    theta = 3.44
    rho = 0.19
    alfa = 0.6
    kappa = np.divide(1, (1- eta) )
    nu = 1 + np.multiply(alfa, np.multiply(varphi, kappa)) - np.divide(kappa, theta)
    pi = 1 - np.multiply(np.multiply( (1 - alfa), varphi), kappa )
    sig = np.divide(np.multiply(eta, kappa), pi )
    #psi = np.multiply(np.power(eta, eta), np.power( (1-eta), (1-eta) ) )
    psi = 1
    i = 7
    r = 27
    gamma1 = gamma(   1 - np.multiply( np.divide(1, np.multiply(theta, (1-rho)) ), np.divide(1, (1 - eta) ) ) )   
    phi = np.array([0.138, 0.174, 0.136, 0.1, 0.051, 0.084, 0.168]).reshape(i, 1)


def taus2():
    par()
        
    tau_h = np.random.uniform(low=-0.99, high=40, size=(i,r))
    tau_h[0, :] = 0

    tau_w = np.random.uniform(low=-0.99, high=0.999, size=(i,r))
    tau_w[0, : ] = tau_w[0, 0]
    
    w =np.random.uniform(low=0.001, high=30, size=(i,r))
    w[0, r-1] = 1
    #w[1:6, :] = w[0, :] 
    
    x1 = np.array([tau_w, tau_h, w])
    
    return x1

x1 = taus2()

\[\begin{equation}\label{eq14} s^* = \left(1 + \frac{1-\eta}{\beta \phi}\right)^{-1} \end{equation}\]

def sf( ):
    global s
    s = np.power( (1+ np.divide( (1-eta), ( np.multiply(beta, phi) ) ) ), -1 )
    s = s.reshape(i, 1)
    return s

The fraction of teachers, $p_{tr}$, can be written as:

\[\begin{equation} p_{tr} = \frac{ \left[ \frac{1- \tau^w_{tr}}{(1 + \tau^h_{tr})^\eta} w_{tr} s_t^{\phi_t}(1-s_t)^{\frac{1-\eta}{\beta}} \right]^\theta} {\sum_{j=1}^N \left[ \frac{1 - \tau_{jr}^w}{(1 + \tau_{jr}^h)^\eta} w_{jr} s_j^{\phi_j} (1-s_j)^{\frac{1-\eta}{\beta}} \right]^\theta } \end{equation}\]

def p_trf(x1):    
    s = sf( )    
    A = np.divide( (1 - x1[0]) , np.power( ( 1 + x1[1] ), eta) )     
    b = np.power(s, phi ) 
    B = np.multiply(b.reshape(i,1), x1[2] )     
    C = np.power( (1 - s), np.divide((1-eta), beta) )
    d = np.multiply(np.multiply(A, B), C )
    k = np.power(d, theta)
    p_tr = np.divide( k[i-1], np.sum(k, axis=0) )
    return p_tr

The teacher’s human capital is given by:

\[\begin{equation} H_{tr} = p_{tr}^{\frac{\nu}{\pi}} (s_t^{\phi_t} \eta^\eta)^{\frac{\kappa}{\pi}} \left( \frac{1-\tau_{tr}^w}{1+ \tau_{tr}^h} w_{tr}\right)^\sigma \gamma^\frac{1}{\pi} \end{equation}\]

Where $\kappa = 1/(1 - \eta)$, $\pi = 1-(1-\alpha)\varphi\kappa$, $\nu = 1+ \alpha\varphi\kappa -\theta\kappa$, and $\sigma = \frac{\eta \kappa}{\pi}$.

  
def H_trf(x1):    
    p_tr = p_trf(x1)  
    A = np.power(np.multiply( np.divide( (1 - x1[0]), (1 + x1[1]) ), x1[2] ), sig )    
    A = A[i-1]     
    P = np.multiply(np.power(p_tr, ( np.divide(nu, pi) )), np.power(eta, sig ) )
    g = np.divide(np.multiply(phi[i-1], kappa ), pi )
    C = np.multiply( np.power(s[i-1], g ), np.power(gamma1, np.divide(1, pi) ) )
    H_tr = np.multiply(np.multiply(P, A), C )       
    return H_tr

\[\begin{equation*} \tilde{w}_{ir}= \psi \left( \frac{1-\tau_{ir}^w }{(1+ \tau_{ir}^h)^\eta} w_{ir} \right) (p_{tr}^{\alpha}H_{tr}^{1-\alpha})^{\varphi} s_i^{\phi_i} (1-s_i)^{\frac{1-\eta}{\beta}} \end{equation*}\] where $\psi = \eta^\eta(1-\eta)^{1-\eta}$. We can interpret $\tilde{w}_{ir}$ as a liquid reward for a person with mean ability from region $r$ and occupation $i$. So, $\tilde{w}_{ir}$ is composed by wage per efficiency unit in the occupation $w_{ir}$ schooling, teacher’s human capital and frictions.

def w_tilf(x1):
    H_tr = H_trf(x1) 
    p_tr = p_trf(x1)                 
    C = np.power((1 - s), np.divide( (1-eta), beta )).reshape(7, 1)
    pp = np.power(s, phi)        
    A = np.multiply(psi, np.multiply( np.divide( (1 - x1[0]), np.power( (1 + x1[1]), eta ) ), x1[2] ) )    
    b = np.power(np.multiply(np.power(p_tr, alfa), np.power(H_tr, (1-alfa) ) ), varphi )    
    B = np.multiply(b, pp.reshape(7,1))
    w_til =  np.multiply(np.multiply(A, B), C )
    return w_til

\[\begin{equation}\label{eq17} p_{ir} = \frac{\tilde{w}_{ir}^\theta} {\sum_{j=1}^N \tilde{w}_{jr}^\theta} \end{equation}\]

Where $p_{ir}$ is the fraction of people that work in occupation $i$ in region $r$


def p_irf(x1):    
    w_til = w_tilf(x1)
    w_til2 = np.power(w_til, theta) 
    w_r = w_til2.sum(axis = 0)    
    p_ir = np.divide(w_til2 , w_r ) 
    return np.array(p_ir), np.array(w_r)

Let $W_{ir}$ be the gross average earnings in occupation $i$ in region $r$. Then:

\[\begin{equation}\label{eq27} W_{ir} = w_{ir}\mathbb{E}[h(e_{ir}, s_{i})\epsilon_i] = \frac{(1-s_i)^{-1/\beta}}{(1-\tau_{ir}^w)}\gamma \eta \left( \sum_{i=1}^N \tilde{w}_{ir}^\theta \right)^{\frac{1}{\theta(1-\eta)}} \end{equation}\]

Where $\gamma= \Gamma(1-(\theta(1-\rho))^{-1}(1-\eta)^{-1})$ is related to the mean of the Fréchet distribution for abilities.

def Wf(x1):
    p_ir, w_r = p_irf(x1)         
    z = np.multiply(np.multiply(gamma1, eta), np.power(w_r, np.divide(1, np.multiply(theta, (1 - eta)) ) ) )
    A = np.divide( np.power( (1 - s), (-1/beta) ), ( 1 - x1[0] )  )                
    W = np.multiply(A, z)
    return W, p_ir

Now I need import PNAD data.

def simul():
    global p_t, W_t
    p_t = pd.read_csv('pt.csv', sep=';')
    p_t = p_t.iloc[0:7]
    p_t.set_index('ocup', inplace=True)
    p_t = np.array(p_t)
    
    W_t = pd.read_csv('wt.csv', sep=';')
    W_t.set_index('ocup', inplace=True)
    W_t = np.array(W_t)
    
    return p_t, W_t

simul()

So, the problem consist in minimize equation below, using L-BFGS-B. Others algorithms can be used, for example genetic algorithm.

\[\begin{equation}\label{eq28} Dist = \sum_{i=1, r=1}^{N, R} \left( \frac{W_{ir}^M - W_{ir}^T}{W_{ir}^T} \right)^2 + \sum_{i=1, r=1}^{N, R} \left( \frac{p_{ir}^M - p_{ir}^T}{p_{ir}^T} \right)^2 \end{equation}\]

# def obj2(x1):
#     tt = x1.reshape(3,5,r) 
#     ll = np.append(tt[0], tt[1,0:2,:] ).reshape(i,r)
#     jj = np.delete(tt[1], [0, 1], 0)
#     jj = np.append(jj, tt[2, 0:4, :]).reshape(i, r)
#     mm = np.delete(tt[2], [0, 1, 2, 3], 0)
#     x1 = np.array([ll, jj, mm])    
#     x1[0][0, :] = x1[0] [0, 0]    
#     x1[1][0, :] = 0
#     x1[2][0, r-1] = 1
#     W, p_ir = Wf(x1)
#     D =  (np.power(np.divide( (W-W_t), W_t ), 2) + np.power(np.divide( (p_ir-p_t), p_t ), 2) ).sum()
#     D = np.log(D)
#     return D


def obj2(x1):
    
    x1 = x1.reshape((3, i, r)) 
    x1[0, 0, : ] = x1[0, 0, 0]    
    x1[1, 0, :] = 0
    x1[2, :, 0] = 1
    x1[2, 0:7, :] = x1[2, 0, :]
    W, p_ir = Wf(x1)
    
    D =  (np.power(np.divide( (W-W_t), W_t ), 2) + np.power(np.divide( (p_ir-p_t), p_t ), 2) ).sum()
    D = np.log(D)
    
    return D

#%time obj2(np.vstack(taus2()).flatten())
%time obj2(taus2())

Wall time: 996 µs





10.46154254168181

Constraints



# optimization

Bd = ((-0.99, 0.999), )*189 + ((-0.99, 40), )*189 + ((0.001, 30), )*27
Bd = np.array(Bd, dtype=object)

def hessp(x, l):
    return np.zeros((3, i, r))

# L-BFGS-B

cc = 0
def callback(x):
    global cc
    cc += 1
    fobj = obj2(x)
    print(f'\033[1;033mObjetivo: {np.around(fobj, 5)}, iter: {cc}') 

    
%time sol= minimize(obj2, taus2(),  method='L-BFGS-B', bounds = Bd, callback=callback, tol=1e-15, options={'maxiter':1e4, 'maxfun':1e1000})

# optimization give me z1 array

z1 = pd.read_excel('z1.xlsx')
z1 = np.array(z1)
z1 = np.array(z1).reshape(3, i, r)


obj2(z1)

-0.0022845109657596892

Human Capital

\[\begin{equation}\label{eq23} H_{ir} = p_{ir}\tilde{h}_{ir}\left( \frac{1-\tau_{ir}^w}{1+\tau_{ir}^h} w_{ir} \right)^{\frac{\eta}{1- \eta}} \mathbb{E} \left[\epsilon_i^{\frac{1}{1-\eta}} \bigg|\text{person choices $i$} \right] \end{equation}\]

and

\[\begin{equation}\label{eq26} \mathbb{E} \left[\epsilon_i^{\frac{1}{1-\eta}} \bigg|\text{person choices $i$} \right] = \left(\frac{1}{p_{ir}} \right)^ {\frac{1}{\theta(1-\eta)}} \Gamma \left( 1 - \frac{1}{\theta(1-\rho)} \frac{1}{1-\eta} \right) \end{equation}\]

Where $\gamma= \Gamma(1-(\theta(1-\rho))^{-1}(1-\eta)^{-1})$ is related to the mean of the Fréchet distribution for abilities. And $\tilde{h}_{ir} = [(p_{tr}^{\alpha}H_{tr}^{1-\alpha})^{\varphi} s_i^{\phi_i} \eta^\eta]^\kappa$.

def h_tilf(x1):
    H_tr = H_trf(x1) 
    p_tr = p_trf(x1) 
    A = np.power(np.multiply(np.power(p_tr, alfa), np.power(H_tr, (1-alfa) ) ), varphi )    
    B = np.multiply(np.power(s, phi), np.power(eta, eta) )
    h_til = np.power(np.multiply(A, B), kappa )
    return h_til

def H_irf(x1):
    p_ir = p_irf(x1)[0]
    h_til = h_tilf(x1)
    A = np.multiply(h_til, np.power(p_ir, (1-kappa/theta) ) )
    B = np.power(np.multiply( np.divide( (1 - x1[0]), (1 + x1[1]) ), x1[2] ), np.multiply(eta, kappa ) )       
    H_ir = np.multiply(np.multiply(A, B), gamma1 )
    return H_ir

The representative firm has the following production function: \[\begin{equation}\label{eq4} Y = \sum_{r=1}^R\sum_{i=1}^N A_r H_{ir} \end{equation}\]



def Y_f(x1):
    H_ir = H_irf(x1) 
    Y = np.multiply(x1[2, 0, :], H_ir)
    return Y


# Model

W, p_ir = Wf(z1.reshape(3, i, r))
W = pd.DataFrame(W)
p_ir = pd.DataFrame(p_ir)

 
# PNAD data

p_t, W_t = simul()   
W_t =  pd.DataFrame(W_t)
p_t =  pd.DataFrame(p_t)


# get colnames

n = pd.read_csv('pt.csv', sep=';')
n = n.iloc[0:7]
n.set_index('ocup', inplace=True)
names = n.columns.str.strip("'")
names  = np.array(names).repeat(7) 



p_t = p_t.values.flatten() 
p_ir = p_ir.values.flatten()

W = W.values.flatten()
W_t = W_t.values.flatten()

# plots


### W and W_ir

W_t =  pd.DataFrame(W_t)
W = pd.DataFrame(W)


W = W.values.flatten()
W_t = W_t.values.flatten()


ocn1 = ['oc1','oc2','oc3','oc4','oc5','oc6','oc7']
ocn1 = np.array(ocn1, dtype=object).repeat(27)

dd = pd.DataFrame(dict(x=W, y=W_t, label=ocn1))
groups = dd.groupby('label')

fig, ax = plt.subplots(figsize=(10,8))
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
for ocn1, group in groups:
    ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=ocn1)
ax.xaxis.set_tick_params(labelsize=20)
ax.yaxis.set_tick_params(labelsize=20)
for ii, txt in enumerate(names):
    ax.annotate(txt, (W[ii], W_t[ii]), size=20) 
ax.set_xlabel('Wage - Model', fontsize=20)
ax.set_ylabel('Wage - PNAD Data', fontsize=20)
ax.plot([1, 4.1], [1, 4.1], 'k-', lw=2, label='45° line')
ax.grid(True)
plt.tight_layout()    
ax.legend( prop={'size': 18})

<matplotlib.legend.Legend at 0x22b342ea188>

png

## p_ir and p_t

p_t =  pd.DataFrame(p_t)
p_ir = pd.DataFrame(p_ir)


p_t = p_t.values.flatten() 
p_ir = p_ir.values.flatten()

ocn = ['oc1','oc2','oc3','oc4','oc5','oc6','oc7']
ocn = np.array(ocn, dtype=object).repeat(27)


ff = pd.DataFrame(dict(x=p_ir, y=p_t, label=ocn))
groups2 = ff.groupby('label')

fig, ax = plt.subplots(figsize=(10,8))
ax.margins(0.05) 
for ocn, group in groups2:
    ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=ocn)
ax.xaxis.set_tick_params(labelsize=20)
ax.yaxis.set_tick_params(labelsize=20)
for ii, txt in enumerate(names):
    ax.annotate(txt, (p_ir[ii], p_t[ii]), size=20) 
ax.set_xlabel('Proportion of workers - Model', fontsize=20)
ax.set_ylabel('Proportion of workers - PNAD Data', fontsize=20)
ax.plot([0, 0.5], [0, 0.5], 'k-', lw=2, label='45° line')
ax.grid(True)
plt.tight_layout()    
ax.legend( prop={'size': 18})

<matplotlib.legend.Legend at 0x22b34556888>

png


## tpf and GDP plots - fig 3

Y = Y_f(z1).sum(axis=0)
tpf = z1[2, 0, :]
names2 = n.columns.str.strip("'")

plt.subplots(figsize=(10,8))
plt.scatter(tpf, Y, s=10)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
(m, b) = np.polyfit(tpf, Y, 1)
yp = np.polyval([m, b], tpf)
plt.plot(tpf, yp, label='Regression Line')
for tt, txt in enumerate(names2):
    plt.annotate(txt, (tpf[tt], Y[tt]), size=20) 
plt.grid(True)
plt.legend(loc="lower right", prop={'size': 20})
plt.xlabel(" TFP - model", fontsize=20)
plt.ylabel("GDP per worker - model", fontsize=20)
plt.tight_layout()

png


# gpd per worker and hc - fig5

H_tr = H_trf(z1)
Y = Y_f(z1).sum(axis=0)
names2 = n.columns.str.strip("'")

plt.subplots(figsize=(10,8))
plt.scatter(H_tr, Y, s=10)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
(m, b) = np.polyfit(H_tr, Y, 1)
yp = np.polyval([m, b], H_tr)
plt.plot(H_tr, yp, label='Regression Line')
for tt, txt in enumerate(names2):
    plt.annotate(txt, (H_tr[tt], Y[tt]), size=20) 
plt.grid(True)
plt.legend(loc="lower right", prop={'size': 20})
plt.xlabel(" Human capital of teachers - model", fontsize=20)
plt.ylabel("GDP per worker - model", fontsize=20)
plt.tight_layout()

png


## p_ir x teachers wage - fig4


p_tr = p_trf(z1)

twg = np.array(W).reshape(i, r)[(i-1), :]/np.array(W).reshape(i, r).sum(axis=0)
 


names2 = n.columns.str.strip("'")

plt.subplots(figsize=(10,8))
plt.scatter(twg, p_tr, s=10)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
(m, b) = np.polyfit(twg, p_tr, 1)
yp = np.polyval([m, b], twg)
plt.plot(twg, yp, label='Regression Line')
for tt, txt in enumerate(names2):
    plt.annotate(txt, (twg[tt], p_tr[tt]), size=20) 
plt.grid(True)
plt.legend(loc="upper left", prop={'size': 20})
plt.xlabel(" Teachers wage relative to other occupations - model", fontsize=20)
plt.ylabel("Proportion of teachers - model", fontsize=20)
plt.tight_layout()

png

Effects of increase in distortions in GDP

In the next exercise, I will increase the distortions and verify if this affect GDP

## increase one unit in tau_h and effect in gpd

dd = np.arange(0,21)

n = len(dd) 
Y_th = np.zeros((n))


for nn in dd: 
    g1 = z1[0] 
    g2 = z1[1] + nn
    g3 = z1[2]
    G = np.array([g1, g2, g3])
    Y_th[nn] = Y_f(G).sum() 
    

plt.subplots(figsize=(10,8))
plt.plot(dd, Y_th)
plt.xticks(np.arange(min(dd), max(dd)+2, 2.0))
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.grid()
plt.xlabel(r"Increases in $\tau^h_{ir}$", fontsize=20)
plt.ylabel("GDP - model", fontsize=20)
plt.tight_layout()

png


## increase one unit in tau_w and effect in gpd



bb = np.arange(-20, 1)
n = len(bb) 
Y_tw = []



for nn in bb: 
    c1 = z1[0] + nn
    c2 = z1[1] 
    c3 = z1[2]
    C = np.array([c1, c2, c3])
    Y_tw.append(Y_f(C).sum())  
    
plt.subplots(figsize=(10,8))  
plt.plot(bb, Y_tw)
plt.xticks(np.arange(min(bb), max(bb)+2, 2.0))
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.grid()
plt.xlabel(r"Increases in $\tau^w_{ir}$ ", fontsize=20)
plt.ylabel("GDP - model", fontsize=20)
plt.tight_layout()

png

Variation of GDP - Changing frictions of the highest and lowest ATHC

Now I put the frictions of RO and SP (highest and lowest ATHC) in all states to verify the effects on GDP. In the first case, the GDP increase, obviously, RO have lowest distortion. To SP, GDP decrease.

gdp2 = Y_f(z1).sum(axis=0)
names2


names2[gdp2.argsort()[-3:][::-1]] # get 3 largest gdps


names2[np.argsort(H_trf(z1))[-3:][::-1]]  # get the 3 largest ATHC


 
# I put the frictions of RO because RO have the largest ATHC
 

jj = np.array(z1, copy=True)

for vv in range(27):
    jj[0, :, vv] = jj[0,:,0]


for vv in range(27):
    jj[1, :, vv] = jj[1,:,0]



gpd_z1 = Y_f(z1).sum(axis=0)
gpd_c = Y_f(jj).sum(axis=0)


plt.subplots(figsize=(10,8))  
plt.scatter(gpd_z1, gpd_c)
plt.yticks(fontsize=20)
plt.xticks(fontsize=20)
for tt, txt in enumerate(names2):
    plt.annotate(txt, (gpd_z1[tt], gpd_c[tt]), size=20)                                                    
plt.grid(True)
plt.xlabel("GDP before change friction - model", fontsize=20)
plt.ylabel('GDP after placing state distortions \n with higher ATHC - model', fontsize=20)
plt.tight_layout()    
plt.plot([4, 7], [4, 7], 'k-', lw=2, label='45° line')
plt.legend(loc="upper left", prop={'size': 20})


print(f'GDP of all states before change frictions {Y_f(z1).sum()}. \n GDP of all states after change frictions {Y_f(jj).sum()} ')

GDP of all states before change frictions 142.9915682638852. 
 GDP of all states after change frictions 1183.511132074184

png


################### exercise 2


jj2 = np.array(z1, copy=True)




# the smallest ATHC is SP

names2[np.argmin(H_trf(z1))]



for vv in range(27):
    jj2[0, :, vv] = jj2[0,:,19]


for vv in range(27):
    jj2[1, :, vv] = jj2[1, :, 19]



gpd_z1 = Y_f(z1).sum(axis=0)
gpd_c2 = Y_f(jj2).sum(axis=0)

plt.subplots(figsize=(10,8))  
plt.scatter(gpd_z1, gpd_c2)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
for tt, txt in enumerate(names2):
    plt.annotate(txt, (gpd_z1[tt], gpd_c2[tt]), size=20)                                                    
plt.grid(True)
plt.xlabel("GDP before change friction - model", fontsize=20)
plt.ylabel('GDP after placing state distortions \n with smallest ATHC - model', fontsize=20)
plt.tight_layout()    
plt.plot([4.4, 7], [4.4, 7], 'k-', lw=2, label='45° line')
plt.legend(loc="upper left", prop={'size': 20})


print(f'GDP of all states before change frictions {Y_f(z1).sum()}. \n GDP of all states after change frictions {Y_f(jj2).sum()} ')

GDP of all states before change frictions 142.9915682638852. 
GDP of all states after change frictions 112.36302538517401

png

Misallocation in brasilian labor market

MARCOS J RIBEIRO

2020-07-31

Constraints

Human Capital

Effects of increase in distortions in GDP

Variation of GDP - Changing frictions of the highest and lowest ATHC