# Titanic(Kaggle)-Logistic（2）

Logistic简介

Logistic回归和线性回归的区别

pytorch版本

## Logistic简介

Logistic是一种广义的线性回归，主要是用来处理二分类问题。

## 损失函数

我在做题中也尝试了用均方误差作为损失函数进行了实验。就是预测值和实际值查的平方的平均值。

Logistic回归用到的公式：

## 完整代码

``````import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

X_train = train_df.drop(['Survived', 'PassengerId'], axis=1)
Y_train = train_df["Survived"]
X_test = test_df.drop(['PassengerId'], axis=1)
samples_size, feature_size = X_train.shape[0], X_train.shape[1]

def linreg(X, w):
return np.dot(X, w)

def sigmoid(h):
return 1 / (1+np.exp(-h))

def sgd(x, y, w, lr=0.01, num_epochs=5000):
y = np.mat(y).transpose()
costs = []
for epoch in range(num_epochs):
# 前向
h = linreg(x, w)
y_hat = sigmoid(h)
cost = (- 1 / samples_size) * np.sum(y.transpose() * np.log(y_hat) + (1 - y).transpose() * (np.log(1 - y_hat)))  # 计算成本
cost = np.squeeze(cost)

# 后向
dz = (y_hat - y)
dw = (np.dot(x.transpose(), dz)) / samples_size
w = w - lr * dw

costs.append(cost)
if epoch % 100 == 0:
costs.append(cost)
print("迭代的次数: %i ， 误差值： %f" % (epoch, cost))

return w, costs

def predict(w, X):
Y_pred = np.zeros((1, samples_size))
y_hat = sigmoid(linreg(X, w))
for i in range(y_hat.shape[0]):
if y_hat[i, 0] > 0.5:
Y_pred[0, i] = 1
return Y_pred

w = np.ones((feature_size, 1))
w, costs = sgd(X_train, Y_train, w)

print(w)
plt.plot(costs)
plt.title("training loss")
plt.show()

Y_pred = predict(w, X_train)
print("训练集准确性：", format(100 - np.mean(np.abs(Y_pred - np.mat(Y_train).transpose())) * 100), "%")
``````

### pytorch版本

``````import torch
import pandas as pd
import numpy as np

X_train = train_df.drop(['Survived', 'PassengerId'], axis=1)
Y_train = train_df["Survived"]
X_test = test_df.drop(['PassengerId'], axis=1)

X_train = torch.from_numpy(X_train.to_numpy()).float()
Y_train = torch.from_numpy(Y_train.to_numpy()).float()
X_test = torch.from_numpy(X_test.to_numpy())
samples_size, feature_size = X_train.shape[0], X_train.shape[1]

class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear = torch.nn.Linear(7, 1)
self.sigmoid = torch.nn.Sigmoid()

def forward(self, x):
y_pred = self.sigmoid(self.linear(x))
return y_pred

model = Model()

loss = torch.nn.BCELoss(reduction='mean')
opt = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(500):
y_pred = model(X_train)

l = loss(y_pred, Y_train)
if epoch % 100 == 0:
print(f'epoch {epoch}, loss {l}')

l.backward()
opt.step()

for i in model.parameters():
print(i)

for i in range(y_pred.shape[0]):
y_pred[i, 0] = 1 if y_pred[i, 0] > 0.5 else 0

y_pred = y_pred.detach().numpy()

print("训练集准确性：", format(100 - np.mean(np.abs(y_pred - np.mat(Y_train).transpose())) * 100), "%")``````

THE END

)">