手推一元线性回归（附代码）

线性回归

用例说明

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

#体重
weights=[10.05,12.54,14.65,16.64,18.98]
#年龄
ages=[1,2,3,4,5]
df=pd.DataFrame()
df['age']=ages
df['weight']=weights
# 散点图
df.plot(kind='scatter', x='age', y='weight', c=None, s=15) # s：点的大小  c：点的颜色 c =np.squeeze(colors)
plt.title(u'儿童年龄体重对照', fontsize=15,fontdict=dict(family='KaiTi')) #楷体
plt.show()


y = ax + b


f

(

x

)

=

2.25

x

+

7.73

y

1

^

=

f

(

x

1

)

=

2.25

×

1

+

7.73

=

9.98

y

2

^

=

f

(

x

2

)

=

2.25

×

2

+

7.73

=

12.23

y

3

^

=

f

(

x

3

)

=

2.25

×

3

+

7.73

=

14.48

y

4

^

=

f

(

x

4

)

=

2.25

×

4

+

7.73

=

16.73

y

5

^

=

f

(

x

5

)

=

2.25

×

5

+

7.73

=

18.98

θ

^

##使用程序计算

y_head5_predict=([2.25*x+7.73 for x in df['age']]) #拟合结果


f

(

x

)

=

2.25

x

+

7.73

[9.98, 12.23, 14.48, 16.73, 18.98]


[ 9.98, 12.23, 14.48, 16.73, 18.98]
[10.05, 12.54, 14.65, 16.64, 18.98]


最小二乘法

损失函数

残差公式

e

=

f

(

x

i

)

y

i

y

i

^

y

i

=

ϵ

损失函数原型

和方差（SSE）

S

S

E

=

i

=

1

m

(

y

i

y

i

^

)

2

均方误差（MSE）

M

S

E

=

S

S

E

N

=

1

N

i

=

1

m

(

y

i

y

i

^

)

2

均方根（RMSE）

R

M

S

E

=

M

S

E

=

1

N

i

=

1

m

(

y

i

y

i

^

)

2

参数估计——最小二乘法

f

(

x

)

=

a

x

+

b

f

(

a

,

b

)

=

i

=

1

m

(

y

i

y

i

^

)

2

=

i

=

1

m

(

y

i

(

a

x

i

+

b

)

)

2

f

(

a

,

b

)

对参数a的求导过程

u

=

y

i

(

a

x

i

+

b

)

(

i

=

1

m

(

y

i

(

a

x

i

+

b

)

)

2

)

=

i

=

1

m

2

(

y

i

(

a

x

i

+

b

)

)

(

y

i

(

a

x

i

+

b

)

)

=

i

=

1

m

2

(

y

i

(

a

x

i

+

b

)

)

(

y

i

(

a

x

i

)

b

=

2

i

=

1

m

(

y

i

(

a

x

i

+

b

)

)

(

0

x

i

0

)

=

2

i

=

1

m

(

a

x

i

+

b

y

i

)

x

i

对参数b的求导过程

u

=

y

i

(

a

x

i

+

b

)

(

i

=

1

m

(

y

i

(

a

x

i

+

b

)

)

2

)

=

i

=

1

m

2

(

y

i

(

a

x

i

+

b

)

)

(

y

i

(

a

x

i

+

b

)

)

=

i

=

1

m

2

(

y

i

(

a

x

i

+

b

)

)

(

y

i

(

a

x

i

)

b

)

=

2

i

=

1

m

(

y

i

(

a

x

i

+

b

)

)

(

0

0

1

)

=

2

i

=

1

m

(

a

x

i

+

b

y

i

)

估算参数

{

a

ϵ

=

2

i

=

1

m

(

a

x

i

+

b

y

i

)

x

i

=

0

a

ϵ

=

2

i

=

1

m

(

a

x

i

+

b

y

i

)

=

0

(长时间没用过高数，只能用笨办法)

i

=

1

m

(

a

x

i

+

b

y

i

)

x

i

=

0

i

=

1

m

(

a

x

i

2

+

b

x

i

y

i

x

i

)

=

0

i

=

1

m

a

x

i

2

+

i

=

1

m

b

x

i

i

=

1

m

y

i

x

i

=

0

a

i

=

1

m

x

i

2

+

b

i

=

1

m

x

i

i

=

1

m

y

i

x

i

=

0

i

=

1

m

(

a

x

i

+

b

y

i

)

=

0

i

=

1

m

a

x

i

+

i

=

1

m

b

i

=

1

m

y

i

=

0

a

i

=

1

m

x

i

+

m

b

i

=

1

m

y

i

=

0

b

=

i

=

1

m

y

i

m

a

i

=

1

m

x

i

m

=

y

ˉ

a

x

ˉ

a

i

=

1

m

x

i

2

+

(

y

ˉ

a

x

ˉ

)

i

=

1

m

x

i

i

=

1

m

y

i

x

i

=

0

a

i

=

1

m

x

i

2

+

y

ˉ

i

=

1

m

x

i

a

x

ˉ

i

=

1

m

x

i

i

=

1

m

y

i

x

i

=

0

a

(

i

=

1

m

x

i

2

x

ˉ

i

=

1

m

x

i

)

+

y

ˉ

i

=

1

m

x

i

i

=

1

m

y

i

x

i

=

0

a

=

i

=

1

m

y

i

x

i

y

ˉ

i

=

1

m

x

i

i

=

1

m

x

i

2

x

ˉ

i

=

1

m

x

i

# 最小二乘法估出参数
x_bar = np.mean(ages)
y_bar = np.mean(weights)
a_param = np.dot(ages, weights) - y_bar * np.sum(ages)
a_param = a_param / (np.sum(np.square(ages)) - x_bar * np.sum(ages))
b_param = y_bar - a_param * x_bar


2.1960000000000037


7.9839999999999876

估算过程补充说明
1. 公式

a

=

i

=

1

m

y

i

x

i

y

ˉ

i

=

1

m

x

i

i

=

1

m

x

i

2

x

ˉ

i

=

1

m

x

i

Large a=frac{sum_{i=1}^{m}y_ix_i-bar{y}sum_{i=1}^{m}x_i}{sum_{i=1}^{m}x_i^2-bar{x}sum_{i=1}^{m}x_i}

这个公式若使用手算还可以再转化：

a

=

i

=

1

m

y

i

x

i

1

m

i

=

1

m

y

i

i

=

1

m

x

i

i

=

1

m

x

i

2

x

ˉ

i

=

1

m

x

i

a

=

i

=

1

m

y

i

x

i

x

ˉ

i

=

1

m

y

i

i

=

1

m

x

i

2

x

ˉ

i

=

1

m

x

i

a

=

i

=

1

m

y

i

(

x

i

x

ˉ

)

i

=

1

m

x

i

2

x

ˉ

i

=

1

m

x

i

2. 代码说明
np.dot(ages, weights)


a

=

[

a

1

,

a

2

,

a

3

,

,

a

n

]

b

=

[

b

1

,

b

2

,

b

3

,

,

b

n

]

a

b

=

a

1

b

1

+

a

2

b

2

+

a

3

b

3

+

+

a

n

b

n

i

=

1

m

y

i

x

i

用程序验证手推结果

# 验证上面手推最小二乘法的结果
# sklearn中，数据都应该是二维矩阵,这里需要转换
x_train = np.array(ages).reshape(-1, 1)
y_train = np.array(weights).reshape(-1, 1)
lr = LinearRegression()
lr.fit(x_train, y_train)
print("斜率:", lr.coef_)
print("截距:", lr.intercept_)


斜率: [[2.196]]



f

(

x

i

)

=

2.196

x

i

+

7.984

多元线性回归

f

(

x

)

=

a

x

+

b

f

(

x

i

)

=

ω

1

x

i

1

+

ω

2

x

i

2

+

+

ω

d

x

i

d

+

b

ϵ

y

=

β

0

+

β

1

x

1

+

β

2

x

2

+

+

β

k

x

k

+

ϵ

ω

ω

f

(

x

i

)

=

ω

T

x

i

+

b

ω

#增加一个身高输入项
ages_highs = [
[1, 2, 3, 4, 5],    #年龄
[76.5,88.5,96.8,104.1,111.3]   #标准身高
]
x_train =np.array(ages_highs).T  #这里需要做矩阵转置
y_train = np.array(weights).reshape(-1, 1)
lr = LinearRegression()
lr.fit(x_train, y_train)
print("斜率:", lr.coef_)
print("截距:", lr.intercept_)
lr.predict([[6,117.7]])


斜率: [[1.67268574 0.06142186]]

array([[20.95730786]])


THE END