import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df_mfa=pd.read_csv("OUTPUT_MFA.csv")
df_mfa.head()

import pandas as pd

mask=df_mfa['recycling.process'].str.contains('pyro', case=False, na=False)&(df_mfa['kg.recycled.weight']==0)
df_filtered=df_mfa[~mask]
df_filtered_mfa=df_filtered[['kg.retired', 'kg.recycled.weight']]
print(df_filtered_mfa.head())

     kg.retired  kg.recycled.weight
0  7.192595e+06        6.473336e+06
1  7.192595e+06        6.473336e+06
2  7.192595e+06        6.473336e+06
3  7.192595e+06        6.473336e+06
6  3.697164e+05        3.327448e+05

print(df_filtered_mfa.head(20))

      kg.retired  kg.recycled.weight
0   7.192595e+06        6.473336e+06
1   7.192595e+06        6.473336e+06
2   7.192595e+06        6.473336e+06
3   7.192595e+06        6.473336e+06
6   3.697164e+05        3.327448e+05
7   3.697164e+05        3.327448e+05
8   3.697164e+05        3.623221e+05
9   3.697164e+05        3.623221e+05
10  3.697164e+05        3.327448e+05
11  3.697164e+05        3.327448e+05
12  1.444868e+06        1.300381e+06
13  1.444868e+06        1.300381e+06
14  1.444868e+06        1.300381e+06
15  1.444868e+06        1.300381e+06
16  1.444868e+06        1.300381e+06
17  1.444868e+06        1.300381e+06
18  2.364594e+06        2.128135e+06
19  2.364594e+06        2.128135e+06
20  2.364594e+06        2.128135e+06
21  2.364594e+06        2.128135e+06

import matplotlib.pyplot as plt
plt.hist(df_filtered_mfa['kg.recycled.weight'],bins=15, color='red', edgecolor='pink')
print(plt.title==('Frequency of Kg. Recycled weight'))
print(plt.xlabel==('kg.recycled.weight'))
print(plt.ylabel==('frequency'))
plt.show()

False
False
False

import seaborn as sns
sns.histplot(data=df_filtered_mfa,x="kg.retired",bins=15,color='red',edgecolor='pink')
plt.title=('Frequency of kg batteries retired')
plt.xlabel=('kg retired')
plt.ylabel=('frequency')
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>

import seaborn as sns
import matplotlib.pyplot as plt

sns.regplot(data=df_filtered_mfa, x='kg.retired',y='kg.recycled.weight',
           line_kws={"color":"blue"},scatter_kws={"alpha":0.4})
plt.title=("Scatterplot of Kg.retired vs Kg.Recycled Weight")
plt.xlabel=("kg retired")
plt.ylabel=("kg.recycled.weight")
plt.show()

residuals=df_filtered_mfa['kg.recycled.weight']-df_filtered_mfa['kg.retired']
import seaborn as sns
import matplotlib.pyplot as plt
sns.histplot(residuals, bins=20, kde=True, color='orange')
plt.title=("Histogram of Residuals of Kg Retired vs Kg Recycled Weight")
plt.xlabel=("Residuals")
plt.ylabel=("Frequency")
plt.show()

import scipy.stats as stats
import matplotlib.pyplot as plt
residuals=df_filtered_mfa['kg.recycled.weight']-df_filtered_mfa['kg.retired']
stats.probplot(residuals,dist="norm",plot=plt)
plt.title=("QQ Plot of Residuals")
plt.show()

from scipy import stats

x=df_filtered_mfa['kg.retired']
y=df_filtered_mfa['kg.recycled.weight']
slope,intercept,r_value,p_value,std_err=stats.linregress(x, y)
print(f"Slope: {slope:.2f}")
print(f"Intercept: {intercept:.2f}")
print(f"R-squared: {r_value**2:.2f}")
print(f"P-value: {p_value:.4f}")

Slope: 0.90
Intercept: -105169.79
R-squared: 1.00
P-value: 0.0000

import statsmodels.api as sm
Z=df_filtered_mfa['kg.retired']
y=df_filtered_mfa['kg.recycled.weight']
Z=sm.add_constant(X)
model=sm.OLS(y,Z).fit()
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:     kg.recycled.weight   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 5.623e+08
Date:                Thu, 26 Jun 2025   Prob (F-statistic):               0.00
Time:                        23:07:21   Log-Likelihood:            -2.5278e+06
No. Observations:              151776   AIC:                         5.056e+06
Df Residuals:                  151774   BIC:                         5.056e+06
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -1.052e+05   1.16e+04     -9.094      0.000   -1.28e+05   -8.25e+04
kg.retired     0.9007    3.8e-05   2.37e+04      0.000       0.901       0.901
==============================================================================
Omnibus:                   107137.686   Durbin-Watson:                   1.256
Prob(Omnibus):                  0.000   Jarque-Bera (JB):         13828849.858
Skew:                          -2.543   Prob(JB):                         0.00
Kurtosis:                      49.485   Cond. No.                     3.31e+08
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.31e+08. This might indicate that there are
strong multicollinearity or other numerical problems.

	Year	scenario.percent.repurposed	scenario.reuse.lifespan	material	recycling.process	Sales.Scenario	Cathode.Scenario	kWh.retired	kg.retired	kg.recycled.weight	kWh	kg.demand	circularity (%)	collection_rate
0	2020	high	reuse.high	Aluminum	direct	SDS	LFP	2417264.362	7192595.151	6473335.636	20735767.32	68338836.46	0.094724	0.6
1	2020	high	reuse.high	Aluminum	direct	STEPS	LFP	2417264.362	7192595.151	6473335.636	20735767.32	68338836.46	0.094724	0.6
2	2020	high	reuse.high	Aluminum	hydro	SDS	LFP	2417264.362	7192595.151	6473335.636	20735767.32	68338836.46	0.094724	0.6
3	2020	high	reuse.high	Aluminum	hydro	STEPS	LFP	2417264.362	7192595.151	6473335.636	20735767.32	68338836.46	0.094724	0.6
4	2020	high	reuse.high	Aluminum	pyro	SDS	LFP	2417264.362	7192595.151	0.000	20735767.32	68338836.46	0.000000	0.6

Introduction and Setup¶

Introducing our Data/Importing Libraries¶

Graph Time¶

Scatterplot¶

Normality/Data Analysis¶

Additional Notes about Data Analysis¶

Conclusion¶

Reference of Data Set used:¶