import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
path = 'ex1data1.txt'
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])
data.head()
## Population Profit
## 0 6.1101 17.5920
## 1 5.5277 9.1302
## 2 8.5186 13.6620
## 3 7.0032 11.8540
## 4 5.8598 6.8233
data.describe()
## Population Profit
## count 97.000000 97.000000
## mean 8.159800 5.839135
## std 3.869884 5.510262
## min 5.026900 -2.680700
## 25% 5.707700 1.986900
## 50% 6.589400 4.562300
## 75% 8.578100 7.046700
## max 22.203000 24.147000
data.plot(kind='scatter', x='Population', y='Profit', figsize=(12,8))
plt.show()
Cost Function \[J\left( \theta \right)=\frac{1}{2m}\sum\limits_{i=1}^{m}{{{\left( {{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}} \right)}^{2}}}\] 其中: \[{{h}_{\theta }}\left( x \right)={{\theta }^{T}}X={{\theta }_{0}}{{x}_{0}}+{{\theta }_{1}}{{x}_{1}}+{{\theta }_{2}}{{x}_{2}}+...+{{\theta }_{n}}{{x}_{n}}\]
def computeCost(X, y, theta):
inner = np.power(((X * theta.T) - y), 2)
return np.sum(inner) / (2 * len(X))
data.insert(0, 'Ones', 1)
data.head()
## Ones Population Profit
## 0 1 6.1101 17.5920
## 1 1 5.5277 9.1302
## 2 1 8.5186 13.6620
## 3 1 7.0032 11.8540
## 4 1 5.8598 6.8233
# set X (training data) and y (target variable)
cols = data.shape[1]
X = data.iloc[:,0:cols-1]#X是所有行,去掉最後一列
y = data.iloc[:,cols-1:cols]#y是所有行,最後一列
X.head()
## Ones Population
## 0 1 6.1101
## 1 1 5.5277
## 2 1 8.5186
## 3 1 7.0032
## 4 1 5.8598
y.head()
## Profit
## 0 17.5920
## 1 9.1302
## 2 13.6620
## 3 11.8540
## 4 6.8233
# 資料轉換array -> matrix
X = np.matrix(X.values)
y = np.matrix(y.values)
theta = np.matrix(np.array([0,0]))
X.shape, y.shape, theta.shape
## ((97, 2), (97, 1), (1, 2))
computeCost(X, y, theta)
## 32.072733877455676
\[{{\theta }_{j}}:={{\theta }_{j}}-\alpha \frac{\partial }{\partial {{\theta }_{j}}}J\left( \theta \right)\]
def gradientDescent(X, y, theta, alpha, iters):
temp = np.matrix(np.zeros(theta.shape))
parameters = int(theta.ravel().shape[1])
cost = np.zeros(iters)
for i in range(iters):
error = (X * theta.T) - y
for j in range(parameters):
term = np.multiply(error, X[:,j])
temp[0,j] = theta[0,j] - ((alpha / len(X)) * np.sum(term))
theta = temp
cost[i] = computeCost(X, y, theta)
return theta, cost
alpha = 0.01
iters = 1000
g, cost = gradientDescent(X, y, theta, alpha, iters)
g
## matrix([[-3.24140214, 1.1272942 ]])
computeCost(X, y, g)
## 4.515955503078914
x = np.linspace(data.Population.min(), data.Population.max(), 100)
f = g[0, 0] + (g[0, 1] * x)
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.Population, data.Profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(iters), cost, 'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()
path = 'ex1data2.txt'
data2 = pd.read_csv(path, header=None, names=['Size', 'Bedrooms', 'Price'])
data2.head()
## Size Bedrooms Price
## 0 2104 3 399900
## 1 1600 3 329900
## 2 2400 3 369000
## 3 1416 2 232000
## 4 3000 4 539900
data2.describe()
## Size Bedrooms Price
## count 47.000000 47.000000 47.000000
## mean 2000.680851 3.170213 340412.659574
## std 794.702354 0.760982 125039.899586
## min 852.000000 1.000000 169900.000000
## 25% 1432.000000 3.000000 249900.000000
## 50% 1888.000000 3.000000 299900.000000
## 75% 2269.000000 4.000000 384450.000000
## max 4478.000000 5.000000 699900.000000
data2 = (data2 - data2.mean()) / data2.std()
data2.head()
## Size Bedrooms Price
## 0 0.130010 -0.223675 0.475747
## 1 -0.504190 -0.223675 -0.084074
## 2 0.502476 -0.223675 0.228626
## 3 -0.735723 -1.537767 -0.867025
## 4 1.257476 1.090417 1.595389
data2.describe()
## Size Bedrooms Price
## count 4.700000e+01 4.700000e+01 4.700000e+01
## mean 3.779483e-17 2.746030e-16 -9.684924e-17
## std 1.000000e+00 1.000000e+00 1.000000e+00
## min -1.445423e+00 -2.851859e+00 -1.363666e+00
## 25% -7.155897e-01 -2.236752e-01 -7.238702e-01
## 50% -1.417900e-01 -2.236752e-01 -3.239979e-01
## 75% 3.376348e-01 1.090417e+00 3.521863e-01
## max 3.117292e+00 2.404508e+00 2.874981e+00
# add ones column
data2.insert(0, 'Ones', 1)
# set X (training data) and y (target variable)
cols = data2.shape[1]
X2 = data2.iloc[:,0:cols-1]
y2 = data2.iloc[:,cols-1:cols]
# convert to matrices and initialize theta
X2 = np.matrix(X2.values)
y2 = np.matrix(y2.values)
theta2 = np.matrix(np.array([0,0,0]))
# perform linear regression on the data set
g2, cost2 = gradientDescent(X2, y2, theta2, alpha, iters)
# get the cost (error) of the model
computeCost(X2, y2, g2)
## 0.1307033696077189
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(iters), cost2, 'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()
from sklearn import linear_model
model = linear_model.LinearRegression()
model.fit(X, y)
## LinearRegression()
##
## /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:593: FutureWarning: np.matrix usage is deprecated in 1.0 and will raise a TypeError in 1.2. Please convert to a numpy array with np.asarray. For more information see: https://numpy.org/doc/stable/reference/generated/numpy.matrix.html
## warnings.warn(
## /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:593: FutureWarning: np.matrix usage is deprecated in 1.0 and will raise a TypeError in 1.2. Please convert to a numpy array with np.asarray. For more information see: https://numpy.org/doc/stable/reference/generated/numpy.matrix.html
## warnings.warn(
x = np.array(X[:, 1].A1)
f = model.predict(X).flatten()
## /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:593: FutureWarning: np.matrix usage is deprecated in 1.0 and will raise a TypeError in 1.2. Please convert to a numpy array with np.asarray. For more information see: https://numpy.org/doc/stable/reference/generated/numpy.matrix.html
## warnings.warn(
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.Population, data.Profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()
# 正規方程
def normalEqn(X, y):
theta = np.linalg.inv(X.T@X)@X.T@y#X.T@X等價於X.T.dot(X)
return theta
final_theta2=normalEqn(X, y)#感觉和批量梯度下降的theta的值有点差距
final_theta2
## matrix([[-3.89578088],
## [ 1.19303364]])