# load libraries
import os
os.getcwd()
os.chdir("C:\\Users\\user\\Downloads\\Thesis_paper_1\\dados")
import numpy as np
import matplotlib.pyplot as plt
import time
import pandas as pd
import datetime
import statsmodels.api as sm
import wbdata
df = pd.read_excel('prod_trab.xlsx', sheet_name='prod_hr')
The table shows that productivity of industry and services are stagnant.Despite years of schooling increasing.
# slice df
ch = df[['Anos', 'Agropecuária', 'Industria Total', 'Serviços Total', 'Total', 'hc']]
# calculate pct
pct = ch.iloc[:,1:].pct_change().iloc[1: , :]
pct3 = pct
# define periods
ind1 = ['1995-2003', '2003-2011', '2011-2016', '2016-2019']
# set year
pct['ano'] = df['Anos']
# create boolean
pct['my_code']= 0
bol1 = (pct['ano']>=1996) & (pct['ano']<=2003)
pct.loc[bol1, ['my_code']] = 1
bol2 = (pct['ano']>=2003) & (pct['ano']<=2011)
pct.loc[bol2, ['my_code']] = 2
bol3 = (pct['ano']>=2011) & (pct['ano']<=2016)
pct.loc[bol3, ['my_code']] = 3
bol4 = (pct['ano']>=2016) & (pct['ano']<=2019)
pct.loc[bol4, ['my_code']] = 4
# group by total
pct['my_code2'] = 0
bol5 = (pct['ano']>=1996) & (pct['ano']<=2019)
pct.loc[bol5, ['my_code2']] = 1
pct2 = pct.groupby(['my_code2']).mean().iloc[:, 0:5]*100
pct2 = pct2.round(4)
# group by mean
pct = pct.groupby(['my_code']).mean().iloc[:, 0:5]*100
pct.index = ind1
pct.rename(columns= {'Industria Total': 'Indústria',
'Serviços Total': 'Serviços',
'hc': 'Anos de escolaridade',
'Total': 'Produtividade agregada'}, inplace=True)
pct = pct.round(4)
#print(pct.to_latex(index=True))
#print(pct2.to_latex(index=True))
pct
| Agropecuária | Indústria | Serviços | Produtividade agregada | Anos de escolaridade | |
|---|---|---|---|---|---|
| 1995-2003 | 5.9721 | -2.2409 | -0.4629 | 0.1612 | 3.8692 |
| 2003-2011 | 6.6982 | 0.9278 | 1.5636 | 2.2208 | 1.7670 |
| 2011-2016 | 8.9577 | -0.3645 | 0.0181 | 0.7520 | 1.9576 |
| 2016-2019 | 5.3128 | 1.4422 | -1.0949 | -0.0805 | -0.0790 |
The regression show that there is no relation between years of schooling and productivity in Brazil.
mod = sm.OLS(pct3.loc[:,['Total']], pct3.loc[:,['hc']] )
res = mod.fit()
print(res.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: Total R-squared (uncentered): 0.101
Model: OLS Adj. R-squared (uncentered): 0.062
Method: Least Squares F-statistic: 2.581
Date: Mon, 04 Jan 2021 Prob (F-statistic): 0.122
Time: 10:43:52 Log-Likelihood: 59.799
No. Observations: 24 AIC: -117.6
Df Residuals: 23 BIC: -116.4
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
hc 0.2482 0.154 1.607 0.122 -0.071 0.568
==============================================================================
Omnibus: 2.117 Durbin-Watson: 1.483
Prob(Omnibus): 0.347 Jarque-Bera (JB): 1.053
Skew: 0.495 Prob(JB): 0.591
Kurtosis: 3.272 Cond. No. 1.00
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# slice dataframe
df2 = df[['Anos', 'Agropecuária', 'Industria Total', 'Serviços Total', 'Total', 'hc']]
You can conclude this in figure below.
fig,ax = plt.subplots(figsize=(10,8))
ax.plot(df2.Anos, df2.Agropecuária, label ='Agropecuária', color='gray', marker='o', markersize=10)
ax.plot(df2.Anos, df2[['Industria Total']], label='Indústria', color='crimson', marker='d', markersize=10)
ax.plot(df2.Anos, df2[['Serviços Total']], label='Serviços', color='orange', marker='s', markersize=10)
ax.plot(df2.Anos, df2['Total'], label='Total', color='blue', marker='v', markersize=10)
ax.set_ylabel("Produtividade do trabalho", fontsize=18)
ax.set_xlabel("Anos", fontsize=18)
plt.legend(loc="center left", prop={'size': 16})
plt.xticks(np.arange(min(df2.Anos), max(df2.Anos)+1, 2.0), fontsize=20, rotation=45)
plt.yticks(fontsize=20)
plt.grid(True)
ax2=ax.twinx()
ax2.plot(df2.Anos, df2.hc, label = 'Anos de escolaridade', color='limegreen', marker='X', markersize=10)
ax2.set_ylabel("Anos de escolaridade", fontsize=18)
plt.yticks(fontsize=20)
plt.legend(loc="lower right", prop={'size': 16})
plt.tight_layout()
#plt.savefig("C:/Users/user/Downloads/Thesis_paper_1/paper_tex/fig1.eps", format='eps', dpi=2000)
Using PWT dataset we verify that TFP of various countries was decrease, when we compare 1998 and 2017. See figure below.
df3 = pd.read_excel('pwt91.xlsx', sheet_name='Data')
df3.head()
| countrycode | country | currency_unit | year | rgdpe | rgdpo | pop | emp | avh | hc | ... | csh_x | csh_m | csh_r | pl_c | pl_i | pl_g | pl_x | pl_m | pl_n | pl_k | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ABW | Aruba | Aruban Guilder | 1950 | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | ABW | Aruba | Aruban Guilder | 1951 | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | ABW | Aruba | Aruban Guilder | 1952 | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | ABW | Aruba | Aruban Guilder | 1953 | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | ABW | Aruba | Aruban Guilder | 1954 | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 52 columns
df4 = df3[['countrycode', 'year', 'rtfpna']]
x = np.array( df4[df4.year==1980]['rtfpna'] )
y = np.array( df4[df4.year==2017]['rtfpna'] )
name = np.array( df4[df4.year==2017]['countrycode'] )
hx = x[np.argmax(1*(name=='BRA') )]+0.08
hy = y[np.argmax(1*(name=='BRA') )]
fig, ax = plt.subplots(figsize=(10, 8))
ax.scatter(x, y, s=10)
plt.scatter(hx, hy, s=1100, color='yellow')
plt.plot([0.5, 1.5], [0.5, 1.5], 'k-', lw=2, label='45° line')
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
for tt, txt in enumerate(name):
ax.annotate(txt, (x[tt], y[tt]), size=20)
plt.grid(True)
plt.legend(loc="lower right", prop={'size': 20})
plt.xlabel("TFP - 1980", fontsize=20)
plt.ylabel("TFP - 2017", fontsize=20)
plt.tight_layout()
#plt.savefig("C:/Users/user/Downloads/Thesis_paper_1/dados/fig2.eps", format='eps', dpi=2000)
The TPF in Brazil between 1955 and 1980. From 1980 it started to decrease.
df5 = df3[df3.countrycode=='BRA'][['rtfpna', 'year']]
x2 = df5['rtfpna']
y2 = df5['year']
fig,ax = plt.subplots(figsize=(10,8))
ax.plot(y2, x2)
plt.grid(True)
plt.xticks(np.arange(min(y2), max(y2)+5, 5), fontsize=20, rotation=45 )
plt.yticks(fontsize=20)
plt.xlabel("Year", fontsize=20)
plt.ylabel("TFP", fontsize=20)
plt.tight_layout()
#plt.savefig("C:/Users/user/Downloads/Thesis_paper_1/dados/fig3.eps", format='eps', dpi=2000)
The spend in education in Brazil has increase since 2004. And since 2006 it's bigger than high income countries. See figure below.
# https://wbdata.readthedocs.io/en/stable/
data_date = datetime.datetime(1995, 1, 1), datetime.datetime(2021, 1, 1)
country = ['BRA', 'HIC', 'LMC', 'UMC', 'LIC']
ind = {'SE.XPD.TOTL.GD.ZS': 'spend'}
df6 = wbdata.get_dataframe(ind, country = country, data_date=data_date)
df6 = df6.reset_index()
df6 = df6.pivot(index="date", columns="country", values="spend")
df6.reset_index(inplace=True)
df6.rename(columns={"date": "Anos", "Brazil": "Brasil",
"High income": "Renda alta", "Low income": "Renda Baixa",
"Lower middle income": "Renda média baixa",
"Upper middle income": "Renda média alta"}, inplace=True)
#pd.to_datetime(df6.Anos, format = "%Y")
#df['col'] = pd.to_datetime(df['col'])
df6.Anos = df6.Anos.astype(str).astype(int)
fig,ax = plt.subplots(figsize=(10,8))
ax.plot(df6.Anos, df6[["Renda média alta"]], label= "Renda média alta", color='orange', marker='v', markersize=10)
ax.plot(df6.Anos, df6[["Renda média baixa"]], label= "Renda média baixa", color='blue', marker='X', markersize=10)
ax.plot(df6.Anos, df6[["Renda Baixa"]], label="Renda Baixa", color='crimson', marker='s', markersize=10)
ax.plot(df6.Anos, df6[["Renda alta"]], label='Renda alta', color='gray', marker='d', markersize=10)
ax.plot(df6.Anos, df6[['Brasil']], label='Brasil', color='green', marker='o', markersize=10)
ax.set_ylabel("Gastos do governo em educação (% do PIB)", fontsize=18)
ax.set_xlabel("Anos", fontsize=18)
plt.legend(loc="upper left", prop={'size': 16})
plt.xticks(np.arange(min(df6.Anos), max(df6.Anos)+1, 2.0), fontsize=20, rotation=45)
plt.yticks(fontsize=20)
plt.grid(True)
plt.tight_layout()
#plt.savefig("C:/Users/user/Downloads/Thesis_paper_1/paper_tex/fig4.eps", format='eps', dpi=2000)
fig,ax = plt.subplots(figsize=(10,8))
ax.plot(df6.Anos, df2.Agropecuária/df6.Brasil, label ='Agropecuária', color='gray', marker='o', markersize=10)
ax.plot(df6.Anos, df2['Industria Total']/df6.Brasil, label='Indústria', color='crimson', marker='d', markersize=10)
ax.plot(df6.Anos, df2['Serviços Total']/df6.Brasil, label='Serviços', color='orange', marker='s', markersize=10)
ax.plot(df6.Anos, df['Total']/df6.Brasil, label='Total', color='blue', marker='v', markersize=10)
ax.set_ylabel("Produtividade relativa aos gastos do governo \n em educação", fontsize=18)
ax.set_xlabel("Anos", fontsize=18)
plt.legend(loc="upper right", prop={'size': 16})
plt.xticks(np.arange(min(df6.Anos), max(df6.Anos)-1, 2.0), fontsize=20, rotation=45)
plt.yticks(fontsize=20)
plt.grid(True)
plt.tight_layout()
#plt.savefig("C:/Users/user/Downloads/Thesis_paper_1/dados/fig5.eps", format='eps', dpi=2000)