# 轮廓指数

1. 计算样本i到组内其他样本平均距离a(i)，
2. 通过样本i到簇中心点距离得到距离样本i最近距离的簇，计算该点到最近簇中所有样本平均距离b(i)
3. 计算样本轮廓系数 s(i) = (b(i)-a(i))/max(b(i), a(i))
4. 计算总轮廓系数 s = 1/n*sum(s(i))
``````def silhouette_score_by_maxtrix(dis_matrix, label):
s_sum = 0
for i in range(len(dis_matrix)):
a_index = (label == label[i])
a_i = dis_matrix[i][a_index].mean()
#得到类中心点索引
b_i = float('inf')
label_j = -1
for j in set(label):
if j == label[i]:
continue
b_index = (label == j)
b_j = dis_matrix[b_index,:][np.argmin(dis_matrix[b_index,:][:,b_index].mean(axis=0)),:][i]#i到另一个类别中心点距离
if b_j < b_i:
label_j = j
b_i = b_j
b_index = (label == label_j) #两类可以直接求，多类需要提取多个类别中心点到该点距离
b_i = dis_matrix[i][b_index].mean()
s_sum += (b_i - a_i)/max(b_i, a_i)
s = s_sum/ len(dis_matrix)
return s
``````

# DB指数

1. 求每个簇内样本到中心点平均距离s_list
2. 求簇中心点间距离
``````def davies_bouldin_score_by_maxtrix(dis_matrix, label):
label_unique = list(set(label))
s_list = []
#组内
for q in label_unique:
q_index = (label == q)
s_list.append( sbd_matrix[q_index,:][np.argmin(dis_matrix[q_index,:][:,q_index].mean(axis = 0))][q_index].mean())
#组间
R_sum = 0
for i in range(len(label_unique)):
label_i = label_unique[i]
i_index = (label == label_i)
max_r_i_j = 0
for j in range(len(label_unique)):
if j == i:
continue
label_j = label_unique[j]
j_index = (label == label_j)
d_i_j = dis_matrix[i_index,:][np.argmin(sbd_matrix[i_index,:][:,i_index].mean(axis = 0)),:][j_index][np.argmin(dis_matrix[j_index,:][:,j_index].mean(axis = 0))]
r_i_j = (s_list[i]+s_list[j])/d_i_j
if r_i_j > max_r_i_j:
max_r_i_j = r_i_j
R_sum += max_r_i_j
K = len(set(label))
return (1/K)*R_sum
label = train_label_data['label'].values
davies_bouldin_score_by_maxtrix(dis_matrix, label)
``````

``````import numpy as np
from sklearn.metrics.pairwise import manhattan_distances
distance_maxtrix = manhattan_distances(np.array([1,2,3,4,5,6]).reshape((6,1)))
label = np.array([0,0,0,1,1,1])
print(silhouette_score_by_maxtrix(distance_maxtrix, label))
print(davies_bouldin_score_by_maxtrix(distance_maxtrix, label))
``````

THE END