Go Machine Learning

1.准备工作

1.1 基本知识

• gonum: ”gonum.org/v1/gonum“

• gota: “github.com/go-gota/gota”

• sklearn “github.com/pa-m/sklearn”

• plot “gonum.org/v1/plot”

2. 正式开始

2.1 数据读取部分

file, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}

defer file.Close()

fmt.Println(irisDF)


func PrintCSV(filename string) {
f, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}

records, err := r.ReadAll()
if err != nil {
log.Fatal(err)
}

for _, record := range records {
for _, i := range record {
print(i)
print(" ")
}
println()
}
}


2.3 基本demo

2.3.1 回归

func PlotImg(filename string) {
file, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}

defer file.Close()

target := DF.Col("Sales").Float()

for _, colName := range DF.Names() {
pts := make(plotter.XYs, DF.Nrow())

for i, floatVal := range DF.Col(colName).Float() {
pts[i].X = floatVal
pts[i].Y = target[i]
}

p := plot.New()
p.X.Label.Text = colName
p.Y.Label.Text = "y"

s, err := plotter.NewScatter(pts)

if err != nil {
log.Fatal(err)
}

if err := p.Save(4*vg.Inch, 4*vg.Inch, colName+"_scatter.jpg"); err != nil {
log.Fatal(err)
}
}
}


sklearn的模型输入是gonum中的mat.Matrix,但是我们是dataframe读取的数据,并不是同一种类型,这里就是静态类型的麻烦之处.不过还好mat.Matrix只是一个interface,所以为了能够转换,我们实现接口相应的方法就好.

type matrix struct {
dataframe.DataFrame
}

func (m matrix) At(i, j int) float64 {
return m.Elem(i, j).Float()
}

func (m matrix) T() mat.Matrix {
return mat.Transpose{m}
}


package p3

import (
"fmt"
"github.com/go-gota/gota/dataframe"
"github.com/pa-m/sklearn/linear_model"
"github.com/pa-m/sklearn/metrics"
modelselection "github.com/pa-m/sklearn/model_selection"
"gonum.org/v1/gonum/mat"
"gonum.org/v1/plot"
"gonum.org/v1/plot/plotter"
"gonum.org/v1/plot/vg"
"image/color"
"log"
"os"
)

// for convert dataframe to matrix
// create nessesary implementations

type matrix struct {
dataframe.DataFrame
}

func (m matrix) At(i, j int) float64 {
return m.Elem(i, j).Float()
}

func (m matrix) T() mat.Matrix {
return mat.Transpose{m}
}

// sales=w*TV+b
func LinearDemo(Isplot bool) {
file, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}

defer file.Close()

X := mat.DenseCopyOf(&matrix{DF.Select([]string{"TV"})})
Y := mat.DenseCopyOf(&matrix{DF.Select([]string{"Sales"})})
XTrain, XTest, YTrain, YTest := modelselection.TrainTestSplit(X, Y, 0.2, 0)

lr := linearmodel.NewLinearRegression()
lr.Fit(XTrain, YTrain)

NPred, _ := XTest.Dims()
Pred := mat.NewDense(NPred, 1, nil)

lr.Predict(XTest, Pred)

fmt.Printf("Coefficients: %.3fn", mat.Formatted(lr.Coef))
fmt.Printf("Coefficients: %.3fn", mat.Formatted(lr.Intercept))
fmt.Printf("the pred result is: Sales=%.3f *TV + %.3fn", mat.Formatted(lr.Intercept), mat.Formatted(lr.Coef))
fmt.Printf("Mean squared error: %.2fn", metrics.MeanSquaredError(YTest, Pred, nil, "").At(0, 0))
fmt.Printf("Mean absolute error: %.2fn", metrics.MeanAbsoluteError(YTest, Pred, nil, "").At(0, 0))
fmt.Printf("Variance score: %.2fn", metrics.R2Score(YTest, Pred, nil, "").At(0, 0))

if Isplot {
//predict all
NPred, _ = X.Dims()
Pred := mat.NewDense(NPred, 1, nil)
lr.Predict(X, Pred)

p := plot.New()
xys := func(X, Y mat.Matrix) plotter.XYs {
var data plotter.XYs
for sample := 0; sample < NPred; sample++ {
data = append(data, struct{ X, Y float64 }{X.At(sample, 0), Y.At(sample, 0)})
}
return data
}

s, _ := plotter.NewScatter(xys(X, Y))
l, _ := plotter.NewLine(xys(X, Pred))
l.Color = color.RGBA{0, 0, 255, 255}

// Save the plot to a PNG file.
pngfile := "linearregression.png"
os.Remove(pngfile)
if err := p.Save(4*vg.Inch, 3*vg.Inch, pngfile); err != nil {
panic(err)
}
}
}



2.3.2 分类

2.3.2.1 KNN&&SVM

iris.csv的内容如下

func replaceLabel(col series.Series) series.Series {
rows := col.Len()
NewSeries := series.New([]float64{}, series.Float, "")
changeMap := map[string]float64{
"Iris-setosa":     0.0,
"Iris-versicolor": 1.0,
"Iris-virginica":  2.0,
}

for i := 0; i < rows; i++ {
NewSeries.Append(changeMap[col.Elem(i).String()])
}

return NewSeries

}


package p3

import (
"fmt"
"github.com/go-gota/gota/dataframe"
"github.com/go-gota/gota/series"
"github.com/pa-m/sklearn/metrics"
modelselection "github.com/pa-m/sklearn/model_selection"
"github.com/pa-m/sklearn/neighbors"
"github.com/pa-m/sklearn/svm"
"gonum.org/v1/gonum/mat"
"log"
"os"
)

func replaceLabel(col series.Series) series.Series {
rows := col.Len()
NewSeries := series.New([]float64{}, series.Float, "")
changeMap := map[string]float64{
"Iris-setosa":     0.0,
"Iris-versicolor": 1.0,
"Iris-virginica":  2.0,
}

for i := 0; i < rows; i++ {
NewSeries.Append(changeMap[col.Elem(i).String()])
}

return NewSeries

}

func ClassifyDemo() {
filename := "iris.csv"
file, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}

defer file.Close()

//replace the label from string to float
X := mat.DenseCopyOf(&matrix{DF.Select([]int{0, 1, 3})})
Y := mat.DenseCopyOf(&matrix{DF.Select(4).Capply(replaceLabel)})
XTrain, XTest, YTrain, YTest := modelselection.TrainTestSplit(X, Y, 0.2, 0)

clf := neighbors.NewKNeighborsClassifier(3, "uniform")
clf.Fit(XTrain, YTrain)

NPred, _ := XTest.Dims()
Pred := mat.NewDense(NPred, 1, nil)
clf.Predict(XTest, Pred)

fmt.Printf("The accuracy of KNN: %.02f%%n", metrics.AccuracyScore(YTest, Pred, true, nil)*100)

clf2 := svm.NewSVC()
//clf2.Degree = 3
clf2.Kernel = "linear"
clf2.Fit(XTrain, YTrain)
clf2.Predict(XTest, Pred)
//fmt.Printf("Pred:n%gn", mat.Formatted(Pred))
fmt.Printf("The accuracy of SVM: %.02f%%n", metrics.AccuracyScore(YTest, Pred, true, nil)*100)
}



KNN本来就是用于多分类问题,所以效果还可以;svm只是二分类,没有用上一对多等策略,所以预测的大多都是某一类导致准确率很差;

2.3.2.2 MLP

1. 构造简单网络,定义输入,隐藏层和输出,中间加入激活函数
2. 正向传播,计算网络输出与目标的损失
3. 根据损失函数反向传播计算梯度,对网络中的权重和偏置项进行更新

type neuralNet struct {
config  neuralNetConfig
wHidden *mat.Dense
bHidden *mat.Dense
wOut    *mat.Dense
bOut    *mat.Dense
}

type neuralNetConfig struct {
inputNum     int
outputNum    int
hiddenNum    int
Epochs       int
learningRate float64
}

func NewNetwork(config neuralNetConfig) *neuralNet {
return &neuralNet{config: config}
}


(

1

2

(

y

y

^

)

2

)

=

y

y

^

(frac{1}{2}(y-hat{y})^2)^{'}=y-hat{y}

(21(yy^)2)=yy^.sigmoid的导数也可以预先计算设置好.修改后的相关代码如下

func (nn *neuralNet) train(x, y *mat.Dense) error {
N, _ := x.Dims()
randSource := rand.NewSource(time.Now().UnixNano())
randGen := rand.New(randSource)

wHiddenRaw := make([]float64, nn.config.hiddenNum*nn.config.inputNum)
bHiddenRaw := make([]float64, nn.config.hiddenNum)
wOutRaw := make([]float64, nn.config.outputNum*nn.config.hiddenNum)
bOutRaw := make([]float64, nn.config.outputNum)

for _, param := range [][]float64{wHiddenRaw, bHiddenRaw, wOutRaw, bOutRaw} {
for i := range param {
param[i] = randGen.Float64()
}
}

wHidden := mat.NewDense(nn.config.inputNum, nn.config.hiddenNum, wHiddenRaw)
bHidden := mat.NewDense(1, nn.config.hiddenNum, bHiddenRaw)
wOut := mat.NewDense(nn.config.hiddenNum, nn.config.outputNum, wOutRaw)
bOut := mat.NewDense(1, nn.config.outputNum, bOutRaw)

output := mat.NewDense(N, nn.config.outputNum, nil)
// train model.
for i := 0; i < nn.config.Epochs; i++ {
// forward
hiddenLayerInput := mat.NewDense(N, nn.config.hiddenNum, nil)
hiddenLayerInput.Mul(x, wHidden)
addBHidden := func(_, col int, v float64) float64 { return v + bHidden.At(0, col) }

hiddenLayerActivations := mat.NewDense(N, nn.config.hiddenNum, nil)
applySigmoid := func(_, _ int, v float64) float64 { return Sigmoid(v) }
hiddenLayerActivations.Apply(applySigmoid, hiddenLayerInput)

outputLayerInput := mat.NewDense(N, nn.config.outputNum, nil)
outputLayerInput.Mul(hiddenLayerActivations, wOut)
addBOut := func(_, col int, v float64) float64 { return v + bOut.At(0, col) }
output.Apply(applySigmoid, outputLayerInput)

// backpropagation.
networkError := mat.NewDense(N, nn.config.outputNum, nil)
networkError.Sub(y, output)

slopeOutputLayer := mat.NewDense(N, nn.config.outputNum, nil)
applySigmoidPrime := func(_, _ int, v float64) float64 { return SigmoidPrime(v) }
slopeOutputLayer.Apply(applySigmoidPrime, output)
slopeHiddenLayer := mat.NewDense(N, nn.config.hiddenNum, nil)
slopeHiddenLayer.Apply(applySigmoidPrime, hiddenLayerActivations)

dOutput := mat.NewDense(N, nn.config.outputNum, nil)
dOutput.MulElem(networkError, slopeOutputLayer)
errorAtHiddenLayer := mat.NewDense(N, nn.config.hiddenNum, nil)
errorAtHiddenLayer.Mul(dOutput, wOut.T())

dHiddenLayer := mat.NewDense(N, nn.config.hiddenNum, nil)
dHiddenLayer.MulElem(errorAtHiddenLayer, slopeHiddenLayer)

// Adjust the parameters.
wOutAdj := mat.NewDense(nn.config.hiddenNum, nn.config.outputNum, nil)

bOutAdj, err := sumAlongAxis(0, dOutput)
if err != nil {
return err
}

wHiddenAdj := mat.NewDense(nn.config.inputNum, nn.config.hiddenNum, nil)

bHiddenAdj, err := sumAlongAxis(0, dHiddenLayer)
if err != nil {
return err
}
}

nn.wHidden = wHidden
nn.bHidden = bHidden
nn.wOut = wOut
nn.bOut = bOut

return nil
}

func (nn *neuralNet) predict(x *mat.Dense) (*mat.Dense, error) {
N, _ := x.Dims()
// Check to make sure that our neuralNet value
// represents a trained model.
if nn.wHidden == nil || nn.wOut == nil || nn.bHidden == nil || nn.bOut == nil {
return nil, errors.New("the supplied neurnal net weights and biases are empty")
}

// Define the output of the neural network.
output := mat.NewDense(N, nn.config.outputNum, nil)

// Complete the feed forward process.
hiddenLayerInput := mat.NewDense(N, nn.config.hiddenNum, nil)
hiddenLayerInput.Mul(x, nn.wHidden)
addBHidden := func(_, col int, v float64) float64 { return v + nn.bHidden.At(0, col) }

hiddenLayerActivations := mat.NewDense(N, nn.config.hiddenNum, nil)
applySigmoid := func(_, _ int, v float64) float64 { return Sigmoid(v) }
hiddenLayerActivations.Apply(applySigmoid, hiddenLayerInput)

outputLayerInput := mat.NewDense(N, nn.config.outputNum, nil)
outputLayerInput.Mul(hiddenLayerActivations, nn.wOut)
addBOut := func(_, col int, v float64) float64 { return v + nn.bOut.At(0, col) }
output.Apply(applySigmoid, outputLayerInput)

return output, nil
}

func Sigmoid(x float64) float64 {
return 1.0 / (1.0 + math.Exp(-x))
}

func SigmoidPrime(x float64) float64 {
return x * (1.0 - x)
}


func replaceLabel2(col dataframe.DataFrame) dataframe.DataFrame {
rows, _ := col.Dims()
var NewDF = make([][]string, 0)
changeMap := map[string][]string{
"Iris-setosa":     []string{"1.", "0.", "0."},
"Iris-versicolor": []string{"0.", "1.", "0."},
"Iris-virginica":  []string{"0.", "0.", "1."},
}

for i := 0; i < rows; i++ {
NewDF = append(NewDF, changeMap[col.Elem(i, 0).String()])
}

}


package p3

import (
"errors"
"fmt"
"github.com/go-gota/gota/dataframe"
"github.com/pa-m/sklearn/metrics"
modelselection "github.com/pa-m/sklearn/model_selection"
"gonum.org/v1/gonum/floats"
"gonum.org/v1/gonum/mat"
"log"
"math"
"math/rand"
"os"
"time"
)

type neuralNet struct {
config  neuralNetConfig
wHidden *mat.Dense
bHidden *mat.Dense
wOut    *mat.Dense
bOut    *mat.Dense
}

type neuralNetConfig struct {
inputNum     int
outputNum    int
hiddenNum    int
Epochs       int
learningRate float64
}

func NewNetwork(config neuralNetConfig) *neuralNet {
return &neuralNet{config: config}
}

func (nn *neuralNet) train(x, y *mat.Dense) error {
N, _ := x.Dims()
randSource := rand.NewSource(time.Now().UnixNano())
randGen := rand.New(randSource)

wHiddenRaw := make([]float64, nn.config.hiddenNum*nn.config.inputNum)
bHiddenRaw := make([]float64, nn.config.hiddenNum)
wOutRaw := make([]float64, nn.config.outputNum*nn.config.hiddenNum)
bOutRaw := make([]float64, nn.config.outputNum)

for _, param := range [][]float64{wHiddenRaw, bHiddenRaw, wOutRaw, bOutRaw} {
for i := range param {
param[i] = randGen.Float64()
}
}

wHidden := mat.NewDense(nn.config.inputNum, nn.config.hiddenNum, wHiddenRaw)
bHidden := mat.NewDense(1, nn.config.hiddenNum, bHiddenRaw)
wOut := mat.NewDense(nn.config.hiddenNum, nn.config.outputNum, wOutRaw)
bOut := mat.NewDense(1, nn.config.outputNum, bOutRaw)

output := mat.NewDense(N, nn.config.outputNum, nil)
// train model.
for i := 0; i < nn.config.Epochs; i++ {
// forward
hiddenLayerInput := mat.NewDense(N, nn.config.hiddenNum, nil)
hiddenLayerInput.Mul(x, wHidden)
addBHidden := func(_, col int, v float64) float64 { return v + bHidden.At(0, col) }

hiddenLayerActivations := mat.NewDense(N, nn.config.hiddenNum, nil)
applySigmoid := func(_, _ int, v float64) float64 { return Sigmoid(v) }
hiddenLayerActivations.Apply(applySigmoid, hiddenLayerInput)

outputLayerInput := mat.NewDense(N, nn.config.outputNum, nil)
outputLayerInput.Mul(hiddenLayerActivations, wOut)
addBOut := func(_, col int, v float64) float64 { return v + bOut.At(0, col) }
output.Apply(applySigmoid, outputLayerInput)

// backpropagation.
networkError := mat.NewDense(N, nn.config.outputNum, nil)
networkError.Sub(y, output)

slopeOutputLayer := mat.NewDense(N, nn.config.outputNum, nil)
applySigmoidPrime := func(_, _ int, v float64) float64 { return SigmoidPrime(v) }
slopeOutputLayer.Apply(applySigmoidPrime, output)
slopeHiddenLayer := mat.NewDense(N, nn.config.hiddenNum, nil)
slopeHiddenLayer.Apply(applySigmoidPrime, hiddenLayerActivations)

dOutput := mat.NewDense(N, nn.config.outputNum, nil)
dOutput.MulElem(networkError, slopeOutputLayer)
errorAtHiddenLayer := mat.NewDense(N, nn.config.hiddenNum, nil)
errorAtHiddenLayer.Mul(dOutput, wOut.T())

dHiddenLayer := mat.NewDense(N, nn.config.hiddenNum, nil)
dHiddenLayer.MulElem(errorAtHiddenLayer, slopeHiddenLayer)

// Adjust the parameters.
wOutAdj := mat.NewDense(nn.config.hiddenNum, nn.config.outputNum, nil)

bOutAdj, err := sumAlongAxis(0, dOutput)
if err != nil {
return err
}

wHiddenAdj := mat.NewDense(nn.config.inputNum, nn.config.hiddenNum, nil)

bHiddenAdj, err := sumAlongAxis(0, dHiddenLayer)
if err != nil {
return err
}
}

nn.wHidden = wHidden
nn.bHidden = bHidden
nn.wOut = wOut
nn.bOut = bOut

return nil
}

func (nn *neuralNet) predict(x *mat.Dense) (*mat.Dense, error) {
N, _ := x.Dims()
// Check to make sure that our neuralNet value
// represents a trained model.
if nn.wHidden == nil || nn.wOut == nil || nn.bHidden == nil || nn.bOut == nil {
return nil, errors.New("the supplied neurnal net weights and biases are empty")
}

// Define the output of the neural network.
output := mat.NewDense(N, nn.config.outputNum, nil)

// Complete the feed forward process.
hiddenLayerInput := mat.NewDense(N, nn.config.hiddenNum, nil)
hiddenLayerInput.Mul(x, nn.wHidden)
addBHidden := func(_, col int, v float64) float64 { return v + nn.bHidden.At(0, col) }

hiddenLayerActivations := mat.NewDense(N, nn.config.hiddenNum, nil)
applySigmoid := func(_, _ int, v float64) float64 { return Sigmoid(v) }
hiddenLayerActivations.Apply(applySigmoid, hiddenLayerInput)

outputLayerInput := mat.NewDense(N, nn.config.outputNum, nil)
outputLayerInput.Mul(hiddenLayerActivations, nn.wOut)
addBOut := func(_, col int, v float64) float64 { return v + nn.bOut.At(0, col) }
output.Apply(applySigmoid, outputLayerInput)

return output, nil
}

func Sigmoid(x float64) float64 {
return 1.0 / (1.0 + math.Exp(-x))
}

func SigmoidPrime(x float64) float64 {
return x * (1.0 - x)
}

func sumAlongAxis(axis int, m *mat.Dense) (*mat.Dense, error) {
numRows, numCols := m.Dims()
var output *mat.Dense
switch axis {
case 0:
data := make([]float64, numCols)
for i := 0; i < numCols; i++ {
col := mat.Col(nil, i, m)
data[i] = floats.Sum(col)
}
output = mat.NewDense(1, numCols, data)
case 1:
data := make([]float64, numRows)
for i := 0; i < numRows; i++ {
row := mat.Row(nil, i, m)
data[i] = floats.Sum(row)
}
output = mat.NewDense(numRows, 1, data)
default:
return nil, errors.New("invalid axis, must be 0 or 1")
}
return output, nil
}

func MaxAlongAxis(m *mat.Dense) (*mat.Dense, error) {
numRows, numCols := m.Dims()
var output *mat.Dense
res := []float64{}
for i := 0; i < numRows; i++ {
row := mat.Row(nil, i, m)
idx := floats.MaxIdx(row)
for j := 0; j < numCols; j++ {
if j == idx {
res = append(res, 1)
} else {
res = append(res, 0)
}
}
}
output = mat.NewDense(numRows, numCols, res)
return output, nil
}

func replaceLabel2(col dataframe.DataFrame) dataframe.DataFrame {
rows, _ := col.Dims()
var NewDF = make([][]string, 0)
changeMap := map[string][]string{
"Iris-setosa":     []string{"1.", "0.", "0."},
"Iris-versicolor": []string{"0.", "1.", "0."},
"Iris-virginica":  []string{"0.", "0.", "1."},
}

for i := 0; i < rows; i++ {
NewDF = append(NewDF, changeMap[col.Elem(i, 0).String()])
}

}

func NNClassify() {
config := neuralNetConfig{
inputNum:     4,
outputNum:    3,
hiddenNum:    10,
Epochs:       1000,
learningRate: 0.009,
}

filename := "iris.csv"
file, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}

defer file.Close()

//replace the label from string to float
X := mat.DenseCopyOf(&matrix{DF.Select([]int{0, 1, 2, 3})})
Y := mat.DenseCopyOf(&matrix{replaceLabel2(DF.Select(4))})

XTrain, XTest, YTrain, YTest := modelselection.TrainTestSplit(X, Y, 0.33, 0)
//fmt.Printf("X_train:n%gn", mat.Formatted(XTrain))
//fmt.Printf("Y_train:n%gn", mat.Formatted(YTrain))
network := NewNetwork(config)
start := time.Now()
if err := network.train(XTrain, YTrain); err != nil {
log.Fatal(err)
}
fmt.Println("Train cost time:", time.Since(start))

f := mat.Formatted(network.wHidden, mat.Prefix(" "))
fmt.Printf("nwHidden = % vnn", f)

f = mat.Formatted(network.bHidden, mat.Prefix(" "))
fmt.Printf("nbHidden = % vnn", f)

f = mat.Formatted(network.wOut, mat.Prefix(" "))
fmt.Printf("nwOut = % vnn", f)

f = mat.Formatted(network.bOut, mat.Prefix(" "))
fmt.Printf("nbOut = % vnn", f)

//predict
predictions, err := network.predict(XTest)
fmt.Printf("npredictions = % vnn", mat.Formatted(predictions))
if err != nil {
log.Fatal(err)
}
//fmt.Println(mat.Formatted(predictions))
pred, err := MaxAlongAxis(predictions)
if err != nil {
log.Fatal(err)
}
//fmt.Println(mat.Formatted(pred))
fmt.Printf("The accuracy of MLP: %.02f%%n", metrics.AccuracyScore(YTest, pred, true, nil)*100)

}


THE END