# load libraries

import os
os.getcwd()
os.chdir("C:\\Users\\user\\Downloads\\Thesis_paper_1\\dados")

import numpy as np
import matplotlib.pyplot as plt
import time
import pandas as pd
import datetime 
import statsmodels.api as sm
import wbdata


df = pd.read_excel('prod_trab.xlsx', sheet_name='prod_hr')


# slice df

ch = df[['Anos', 'Agropecuária', 'Industria Total', 'Serviços Total', 'Total', 'hc']]


# calculate pct

pct = ch.iloc[:,1:].pct_change().iloc[1: , :]

pct3 = pct

# define periods

ind1 = ['1995-2003', '2003-2011', '2011-2016', '2016-2019']


# set year

pct['ano'] = df['Anos']



# create boolean

pct['my_code']= 0
bol1 = (pct['ano']>=1996) & (pct['ano']<=2003)
pct.loc[bol1, ['my_code']] = 1
bol2 = (pct['ano']>=2003) & (pct['ano']<=2011)
pct.loc[bol2, ['my_code']] = 2
bol3 = (pct['ano']>=2011) & (pct['ano']<=2016)
pct.loc[bol3, ['my_code']] = 3
bol4 = (pct['ano']>=2016) & (pct['ano']<=2019)
pct.loc[bol4, ['my_code']] = 4


# group by total

pct['my_code2'] = 0
bol5 = (pct['ano']>=1996) & (pct['ano']<=2019)
pct.loc[bol5, ['my_code2']] = 1

pct2 = pct.groupby(['my_code2']).mean().iloc[:, 0:5]*100

pct2 = pct2.round(4)


# group by mean

pct = pct.groupby(['my_code']).mean().iloc[:, 0:5]*100
pct.index = ind1

pct.rename(columns= {'Industria Total': 'Indústria', 
                     'Serviços Total': 'Serviços', 
                     'hc': 'Anos de escolaridade', 
                    'Total': 'Produtividade agregada'}, inplace=True)


pct = pct.round(4)

#print(pct.to_latex(index=True))
#print(pct2.to_latex(index=True))
pct


mod = sm.OLS(pct3.loc[:,['Total']], pct3.loc[:,['hc']] )
res = mod.fit()

print(res.summary())

                                 OLS Regression Results                                
=======================================================================================
Dep. Variable:                  Total   R-squared (uncentered):                   0.101
Model:                            OLS   Adj. R-squared (uncentered):              0.062
Method:                 Least Squares   F-statistic:                              2.581
Date:                Mon, 04 Jan 2021   Prob (F-statistic):                       0.122
Time:                        10:43:52   Log-Likelihood:                          59.799
No. Observations:                  24   AIC:                                     -117.6
Df Residuals:                      23   BIC:                                     -116.4
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
hc             0.2482      0.154      1.607      0.122      -0.071       0.568
==============================================================================
Omnibus:                        2.117   Durbin-Watson:                   1.483
Prob(Omnibus):                  0.347   Jarque-Bera (JB):                1.053
Skew:                           0.495   Prob(JB):                        0.591
Kurtosis:                       3.272   Cond. No.                         1.00
==============================================================================

Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.


# slice dataframe

df2 = df[['Anos', 'Agropecuária', 'Industria Total', 'Serviços Total', 'Total', 'hc']]


fig,ax = plt.subplots(figsize=(10,8))
ax.plot(df2.Anos, df2.Agropecuária, label ='Agropecuária', color='gray', marker='o', markersize=10)
ax.plot(df2.Anos, df2[['Industria Total']], label='Indústria', color='crimson', marker='d', markersize=10)
ax.plot(df2.Anos, df2[['Serviços Total']], label='Serviços', color='orange', marker='s', markersize=10)
ax.plot(df2.Anos, df2['Total'],  label='Total', color='blue', marker='v', markersize=10)
ax.set_ylabel("Produtividade do trabalho", fontsize=18)
ax.set_xlabel("Anos", fontsize=18)
plt.legend(loc="center left", prop={'size': 16})
plt.xticks(np.arange(min(df2.Anos), max(df2.Anos)+1, 2.0), fontsize=20, rotation=45)
plt.yticks(fontsize=20)
plt.grid(True)
ax2=ax.twinx()
ax2.plot(df2.Anos, df2.hc, label = 'Anos de escolaridade', color='limegreen', marker='X', markersize=10)
ax2.set_ylabel("Anos de escolaridade", fontsize=18)
plt.yticks(fontsize=20)
plt.legend(loc="lower right", prop={'size': 16})
plt.tight_layout()    
#plt.savefig("C:/Users/user/Downloads/Thesis_paper_1/paper_tex/fig1.eps", format='eps', dpi=2000)


df3 = pd.read_excel('pwt91.xlsx', sheet_name='Data')


df3.head()


df4 = df3[['countrycode', 'year', 'rtfpna']]


x = np.array( df4[df4.year==1980]['rtfpna'] )
y = np.array( df4[df4.year==2017]['rtfpna'] )
name = np.array( df4[df4.year==2017]['countrycode'] )


hx = x[np.argmax(1*(name=='BRA') )]+0.08
hy = y[np.argmax(1*(name=='BRA') )]


fig, ax = plt.subplots(figsize=(10, 8))
ax.scatter(x, y, s=10)
plt.scatter(hx, hy, s=1100, color='yellow')
plt.plot([0.5, 1.5], [0.5, 1.5], 'k-', lw=2, label='45° line')
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
for tt, txt in enumerate(name):
    ax.annotate(txt, (x[tt], y[tt]), size=20)  
plt.grid(True)
plt.legend(loc="lower right", prop={'size': 20})
plt.xlabel("TFP - 1980", fontsize=20)
plt.ylabel("TFP - 2017", fontsize=20)
plt.tight_layout()    
#plt.savefig("C:/Users/user/Downloads/Thesis_paper_1/dados/fig2.eps", format='eps', dpi=2000)


df5 =  df3[df3.countrycode=='BRA'][['rtfpna', 'year']]


x2 = df5['rtfpna']
y2 = df5['year']


fig,ax = plt.subplots(figsize=(10,8))
ax.plot(y2, x2)
plt.grid(True)
plt.xticks(np.arange(min(y2), max(y2)+5, 5), fontsize=20, rotation=45 )
plt.yticks(fontsize=20)
plt.xlabel("Year", fontsize=20)
plt.ylabel("TFP", fontsize=20)
plt.tight_layout()    
#plt.savefig("C:/Users/user/Downloads/Thesis_paper_1/dados/fig3.eps", format='eps', dpi=2000)


# https://wbdata.readthedocs.io/en/stable/

data_date = datetime.datetime(1995, 1, 1), datetime.datetime(2021, 1, 1) 
country = ['BRA', 'HIC', 'LMC', 'UMC', 'LIC']

ind = {'SE.XPD.TOTL.GD.ZS': 'spend'}

df6 = wbdata.get_dataframe(ind, country = country,  data_date=data_date)


df6 = df6.reset_index()


df6 = df6.pivot(index="date", columns="country", values="spend")


df6.reset_index(inplace=True)


df6.rename(columns={"date": "Anos", "Brazil": "Brasil",
                   "High income": "Renda alta", "Low income": "Renda Baixa",
                   "Lower middle income": "Renda média baixa",
                   "Upper middle income": "Renda média alta"}, inplace=True)


#pd.to_datetime(df6.Anos, format = "%Y")

#df['col'] = pd.to_datetime(df['col'])

df6.Anos = df6.Anos.astype(str).astype(int)


fig,ax = plt.subplots(figsize=(10,8))
ax.plot(df6.Anos, df6[["Renda média alta"]], label= "Renda média alta", color='orange', marker='v', markersize=10)
ax.plot(df6.Anos, df6[["Renda média baixa"]], label= "Renda média baixa", color='blue', marker='X', markersize=10)
ax.plot(df6.Anos, df6[["Renda Baixa"]], label="Renda Baixa", color='crimson', marker='s', markersize=10)
ax.plot(df6.Anos, df6[["Renda alta"]], label='Renda alta', color='gray', marker='d', markersize=10)
ax.plot(df6.Anos, df6[['Brasil']], label='Brasil', color='green', marker='o', markersize=10)
ax.set_ylabel("Gastos do governo em educação (% do PIB)", fontsize=18)
ax.set_xlabel("Anos", fontsize=18)
plt.legend(loc="upper left", prop={'size': 16})
plt.xticks(np.arange(min(df6.Anos), max(df6.Anos)+1, 2.0), fontsize=20, rotation=45)
plt.yticks(fontsize=20)
plt.grid(True)
plt.tight_layout()    
#plt.savefig("C:/Users/user/Downloads/Thesis_paper_1/paper_tex/fig4.eps", format='eps', dpi=2000)


fig,ax = plt.subplots(figsize=(10,8))
ax.plot(df6.Anos, df2.Agropecuária/df6.Brasil, label ='Agropecuária', color='gray', marker='o', markersize=10)
ax.plot(df6.Anos, df2['Industria Total']/df6.Brasil, label='Indústria', color='crimson', marker='d', markersize=10)
ax.plot(df6.Anos, df2['Serviços Total']/df6.Brasil, label='Serviços', color='orange', marker='s', markersize=10)
ax.plot(df6.Anos, df['Total']/df6.Brasil,  label='Total', color='blue', marker='v', markersize=10)
ax.set_ylabel("Produtividade relativa aos gastos do governo \n em educação", fontsize=18)
ax.set_xlabel("Anos", fontsize=18)
plt.legend(loc="upper right", prop={'size': 16})
plt.xticks(np.arange(min(df6.Anos), max(df6.Anos)-1, 2.0), fontsize=20, rotation=45)
plt.yticks(fontsize=20)
plt.grid(True)
plt.tight_layout()    
#plt.savefig("C:/Users/user/Downloads/Thesis_paper_1/dados/fig5.eps", format='eps', dpi=2000)

	Agropecuária	Indústria	Serviços	Produtividade agregada	Anos de escolaridade
1995-2003	5.9721	-2.2409	-0.4629	0.1612	3.8692
2003-2011	6.6982	0.9278	1.5636	2.2208	1.7670
2011-2016	8.9577	-0.3645	0.0181	0.7520	1.9576
2016-2019	5.3128	1.4422	-1.0949	-0.0805	-0.0790

	countrycode	country	currency_unit	year	rgdpe	rgdpo	pop	emp	avh	hc	...	csh_x	csh_m	csh_r	pl_c	pl_i	pl_g	pl_x	pl_m	pl_n	pl_k
0	ABW	Aruba	Aruban Guilder	1950	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	ABW	Aruba	Aruban Guilder	1951	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	ABW	Aruba	Aruban Guilder	1952	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	ABW	Aruba	Aruban Guilder	1953	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	ABW	Aruba	Aruban Guilder	1954	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

Load IBRE dataset¶

Average Produtivity Growth¶

Estimate regression using statsmodel¶

Stagnant productivity is a global trend¶

Total Productivity Factors in Brazil (PWT data)¶

Spend in education (World Bank Data)¶