reticulate::use_condaenv("r-reticulate",required = TRUE)
reticulate::py_install("pandas")
reticulate::py_install("numpy")
reticulate::py_install("matplotlib")
reticulate::py_install("seaborn")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data("mtcars")
glimpse(mtcars)
## Observations: 32
## Variables: 11
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8…
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8…
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 1…
## $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 18…
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3…
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 1…
## $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0…
## $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0…
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3…
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2…
df_py = pd.DataFrame(r.mtcars)
df_plot = df_py.groupby('cyl').agg({'mpg':np.mean})
plt.bar(df_plot.index,df_plot.mpg)
## <BarContainer object of 3 artists>
plt.show()
df_py.describe().T
## count mean std ... 50% 75% max
## mpg 32.0 20.090625 6.026948 ... 19.200 22.80 33.900
## cyl 32.0 6.187500 1.785922 ... 6.000 8.00 8.000
## disp 32.0 230.721875 123.938694 ... 196.300 326.00 472.000
## hp 32.0 146.687500 68.562868 ... 123.000 180.00 335.000
## drat 32.0 3.596563 0.534679 ... 3.695 3.92 4.930
## wt 32.0 3.217250 0.978457 ... 3.325 3.61 5.424
## qsec 32.0 17.848750 1.786943 ... 17.710 18.90 22.900
## vs 32.0 0.437500 0.504016 ... 0.000 1.00 1.000
## am 32.0 0.406250 0.498991 ... 0.000 1.00 1.000
## gear 32.0 3.687500 0.737804 ... 4.000 4.00 5.000
## carb 32.0 2.812500 1.615200 ... 2.000 4.00 8.000
##
## [11 rows x 8 columns]
df_r <- py$df_py
summary(df_r)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000