email:rchang@unah.edu.hn / rchang@unitec.edu
You can check your Python environment with:
##Chunk R
reticulate::py_config()
## python: C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate/python.exe
## libpython: C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate/python38.dll
## pythonhome: C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate
## version: 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 05:59:00) [MSC v.1929 64 bit (AMD64)]
## Architecture: 64bit
## numpy: C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate/Lib/site-packages/numpy
## numpy_version: 1.23.4
##
## NOTE: Python version was forced by RETICULATE_PYTHON
##Chunk R
library(reticulate)
use_python("C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate/python.exe")
Primero, verifica que el entorno “r-reticulate” esté instalado y configurado correctamente en tu sistema. Luego busco la dirección en la terminal o bash:
sh Copiar código
conda env list
##Chunk R
use_condaenv("C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate/python.exe",required = TRUE)
#py_install(c("matplotlib", "seaborn", "scipy", "sklearn", "statsmodels"))
#Si no funciona py_install() entonces probamos de la siguiente manera en el #siguiente chunk
Instalar las Bibliotecas por Separado:
Intenta instalar cada biblioteca por separado utilizando conda en tu terminal o símbolo del sistema. Aquí te dejo los comandos para cada una:
conda install –yes –prefix “C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate” -c conda-forge matplotlib conda install –yes –prefix “C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate” -c conda-forge seaborn conda install –yes –prefix “C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate” -c conda-forge scipy conda install –yes –prefix “C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate” scikit-learn conda install –yes –prefix “C:/Users/rchang/AppData/Local/r-miniconda/envs/r-reticulate” -c conda-forge statsmodels
##Chunk python
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import sklearn
import statsmodels
##Chunk python
import pandas as pd
# Generar un dataset simple
data = {
'A': [1, 2, 3],
'B': [4, 5, 6],
'C': ['X', 'Y', 'Z']
}
df = pd.DataFrame(data)
# Mostrar las primeras filas del dataset
print(df.head())
## A B C
## 0 1 4 X
## 1 2 5 Y
## 2 3 6 Z
##Chunk python
# Tratamiento de datos
import pandas as pd
import numpy as np
# Gráficos
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
# Preprocesado y modelado
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
import statsmodels.formula.api as smf
# Configuración matplotlib
plt.rcParams['image.cmap'] = "bwr"
#plt.rcParams['figure.dpi'] = "100"
plt.rcParams['savefig.bbox'] = "tight"
style.use('ggplot') or plt.style.use('ggplot')
# Configuración warnings
import warnings
warnings.filterwarnings('ignore')
# --- PYTHON ---
# Datos
equipos = ["Texas","Boston","Detroit","Kansas","St.","New_S.","New_Y.", "Milwaukee","Colorado","Houston","Baltimore","Los_An.","Chicago", "Cincinnati","Los_P.","Philadelphia","Chicago","Cleveland","Arizona", "Toronto","Minnesota","Florida","Pittsburgh","Oakland","Tampa", "Atlanta","Washington","San.F","San.I","Seattle"]
bateos = [5659, 5710, 5563, 5672, 5532, 5600, 5518, 5447, 5544, 5598, 5585, 5436, 5549, 5612, 5513, 5579, 5502, 5509, 5421, 5559, 5487, 5508, 5421, 5452, 5436, 5528, 5441, 5486, 5417, 5421]
runs = [855, 875, 787, 730, 762, 718, 867, 721, 735, 615, 708, 644, 654, 735, 667, 713, 654, 704, 731, 743, 619, 625, 610, 645, 707, 641, 624, 570, 593, 556]
datos = pd.DataFrame({'equipos': equipos, 'bateos': bateos, 'runs': runs})
datos
## equipos bateos runs
## 0 Texas 5659 855
## 1 Boston 5710 875
## 2 Detroit 5563 787
## 3 Kansas 5672 730
## 4 St. 5532 762
## 5 New_S. 5600 718
## 6 New_Y. 5518 867
## 7 Milwaukee 5447 721
## 8 Colorado 5544 735
## 9 Houston 5598 615
## 10 Baltimore 5585 708
## 11 Los_An. 5436 644
## 12 Chicago 5549 654
## 13 Cincinnati 5612 735
## 14 Los_P. 5513 667
## 15 Philadelphia 5579 713
## 16 Chicago 5502 654
## 17 Cleveland 5509 704
## 18 Arizona 5421 731
## 19 Toronto 5559 743
## 20 Minnesota 5487 619
## 21 Florida 5508 625
## 22 Pittsburgh 5421 610
## 23 Oakland 5452 645
## 24 Tampa 5436 707
## 25 Atlanta 5528 641
## 26 Washington 5441 624
## 27 San.F 5486 570
## 28 San.I 5417 593
## 29 Seattle 5421 556
##Chunk python
# Tratamiento de datos
import pandas as pd
import numpy as np
# Gráficos
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
# Preprocesado y modelado
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
import statsmodels.formula.api as smf
# Configuración matplotlib
plt.rcParams['image.cmap'] = "bwr"
plt.rcParams['savefig.bbox'] = "tight"
style.use('ggplot') or plt.style.use('ggplot')
# Configuración warnings
import warnings
warnings.filterwarnings('ignore')
# Datos
equipos = ["Texas", "Boston", "Detroit", "Kansas", "St.", "New_S.", "New_Y.",
"Milwaukee", "Colorado", "Houston", "Baltimore", "Los_An.", "Chicago",
"Cincinnati", "Los_P.", "Philadelphia", "Chicago", "Cleveland", "Arizona",
"Toronto", "Minnesota", "Florida", "Pittsburgh", "Oakland", "Tampa",
"Atlanta", "Washington", "San.F", "San.I", "Seattle"]
bateos = [5659, 5710, 5563, 5672, 5532, 5600, 5518, 5447, 5544, 5598, 5585, 5436,
5549, 5612, 5513, 5579, 5502, 5509, 5421, 5559, 5487, 5508, 5421, 5452,
5436, 5528, 5441, 5486, 5417, 5421]
runs = [855, 875, 787, 730, 762, 718, 867, 721, 735, 615, 708, 644, 654, 735, 667,
713, 654, 704, 731, 743, 619, 625, 610, 645, 707, 641, 624, 570, 593, 556]
datos = pd.DataFrame({'equipos': equipos, 'bateos': bateos, 'runs': runs})
# Gráfico de dispersión con línea de regresión
plt.figure(figsize=(10, 6))
sns.regplot(x='bateos', y='runs', data=datos, scatter_kws={'s':50}, line_kws={'color':'red'})
plt.title('Relación entre Bateos y Runs')
plt.xlabel('Bateos')
plt.ylabel('Runs')
plt.grid(True)
plt.show()