# 一、NumPy的初步使用

``````#  数据的矩阵化
import numpy as np
data = np.mat([[1,200,105,3,False],[2,165,80,2,False],[3,184.5,120,2,False],
[4,116,70.8,1,False],[5,270,150,4,True]])
row = 0
for line in data:
row += 1
print( row )
print(data.size)
print(data)
``````

# 二、Matplotlib包的使用–图形化数据处理

``````import numpy as np
import scipy.stats as stats
import pylab
data =  np.mat([[1,200,105,3,False],[2,165,80,2,False],[3,184.5,120,2,False],
[4,116,70.8,1,False],[5,270,150,4,True]])
coll = []
for row in data:
coll.append(row[0,1])

stats.probplot(coll,plot=pylab)
pylab.show()
``````

# 三、深度学习理论方法–相似度计算（可以跳过）

## 1、基于欧几里得距离的相似度计算

d12表示用户1和用户2的相似度，那么就有：

# 四、数据统计的可视化展示（以我们亳州市降水为例）

## 1、数据的四分位

Q1的位置 = （n+1）*0.25
Q2的位置 = （n+1）*0.50
Q3的位置 = （n+1）*0.75

``````from pylab import *
import pandas as pd
import matplotlib.pyplot as plot
filepath = ("C:\Users\AWAITXM\Desktop\rain.csv")
# "C:UsersAWAITXMDesktoprain.csv"
summary = dataFile.describe()
print(summary)

array = dataFile.iloc[:,:].values
boxplot(array)
plot.xlabel("year")
plot.ylabel("rain")
show()
``````

``````from pylab import *
import pandas as pd
import matplotlib.pyplot as plot
filepath = ("C:\Users\AWAITXM\Desktop\rain.csv")
# "C:UsersAWAITXMDesktoprain.csv"
summary = dataFile.describe()
minRings = -1
maxRings = 99
nrows = 11
for i in range(nrows):
dataRow = dataFile.iloc[i,1:13]
labelColor = ( (dataFile.iloc[i,12] - minRings ) / (maxRings - minRings) )
dataRow.plot(color = plot.cm.RdYlBu(labelColor),alpha = 0.5)
plot.xlabel("Attribute")
plot.ylabel(("Score"))
show()
``````

``````from pylab import *
import pandas as pd
import matplotlib.pyplot as plot
filepath = ("C:\Users\AWAITXM\Desktop\rain.csv")
# "C:UsersAWAITXMDesktoprain.csv"
summary = dataFile.describe()
corMat = pd.DataFrame(dataFile.iloc[1:20,1:20].corr())
plot.pcolor(corMat)
plot.show()
``````

THE END

)">