# reticulate::py_install(packages = "scikit-learn") 不是 sklearn
from sklearn import neighbors, datasets, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
iris = datasets.load_iris()
X,y = iris.data,iris.target
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=33)
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train,y_train)
## KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
## metric_params=None, n_jobs=None, n_neighbors=5, p=2,
## weights='uniform')
y_pred = knn.predict(X_test)
accuracy_score(y_test,y_pred)
## 0.8947368421052632
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=33)
from sklearn.preprocessing import StandardScaler
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
from sklearn.preprocessing import Normalizer
scaler = preprocessing.Normalizer.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
from sklearn.preprocessing import Binarizer
scaler = preprocessing.Binarizer().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
y = enc.fit_transform(y)
there are many other methods in sklearn.preprocessing
from sklearn.linear_model import LinearRegression
lr = LinearRegression(normalize = True)
from sklearn.svm import SVC
svc = SVC(kernel = "linear")
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
from sklearn.ensemble import RandomForestClassifier
ran = RandomForestClassifier(n_estimators=20)
from sklearn.cluster import KMeans
k_means = KMeans(n_clusters=3,random_state=0)
knn.fit(X_Train,y_train)
k_means(X_Train)
y_pred = knn.predict(X_test)
y_pred = k_means.predict(X_Test)
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)
from sklearn.metrics import classification_report
classification_report(y_test,y_pred)
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test,y_pred)
if you want to evaluate your models , you need metrics modual
from sklearn.model_selection import cross_val_score
clf = svm.SVC(kernel='linear', C=1, random_state=42)
scores = cross_val_score(clf, X, y, cv=5)
import numpy as np
from sklearn.model_selection import GridSearchCV
params = {"n_neighbors": np.arange(1,3), "metric": ["euclidean", "cityblock"]}
grid = GridSearchCV(estimator=knn, param_grid=params)
grid.fit(X_train, y_train)
## GridSearchCV(cv=None, error_score=nan,
## estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
## metric='minkowski',
## metric_params=None, n_jobs=None,
## n_neighbors=5, p=2,
## weights='uniform'),
## iid='deprecated', n_jobs=None,
## param_grid={'metric': ['euclidean', 'cityblock'],
## 'n_neighbors': array([1, 2])},
## pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
## scoring=None, verbose=0)
print(grid.best_score_)
## 0.9553359683794467
print(grid.best_estimator_.n_neighbors)
## 1
from sklearn.model_selection import RandomizedSearchCV
params = {"n_neighbors": range(1,5), "weights": ["uniform", "distance"]}
grid = RandomizedSearchCV(estimator=knn, param_distributions=params,cv=4, n_iter=8, random_state=5)
grid.fit(X_train, y_train)
## RandomizedSearchCV(cv=4, error_score=nan,
## estimator=KNeighborsClassifier(algorithm='auto',
## leaf_size=30,
## metric='minkowski',
## metric_params=None,
## n_jobs=None, n_neighbors=5,
## p=2, weights='uniform'),
## iid='deprecated', n_iter=8, n_jobs=None,
## param_distributions={'n_neighbors': range(1, 5),
## 'weights': ['uniform', 'distance']},
## pre_dispatch='2*n_jobs', random_state=5, refit=True,
## return_train_score=False, scoring=None, verbose=0)
print(grid.best_score_)
## 0.9642857142857143
print(grid.best_estimator_)
## KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
## metric_params=None, n_jobs=None, n_neighbors=3, p=2,
## weights='uniform')
https://scikit-learn.org/stable/supervised_learning.html#supervised-learning