| @@ -0,0 +1,84 @@ | |||
| import pandas as pd | |||
| import numpy as np | |||
| import matplotlib | |||
| import matplotlib.pyplot as plt | |||
| def get_data(file): | |||
| df = pd.read_csv(file,header = None) | |||
| return df.values | |||
| def get_Polar(data): | |||
| dist = np.sqrt(data[0]**2 + data[1]**2) | |||
| angle = np.arctan(data[1]/data[0]) | |||
| return [dist,angle] | |||
| def get_distance(data,origin): | |||
| if(data == origin).all(): | |||
| return 0 | |||
| dist = np.sqrt((data[0]-origin[0])**2 + (data[1]-origin[1])**2) | |||
| return dist | |||
| def kmeans(): | |||
| data = get_data("dataset_circles.csv") | |||
| polar_data = list(map(get_Polar,data.tolist())) | |||
| polar_data = np.array(polar_data) | |||
| # print(polar_data) | |||
| # plt.scatter(polar_data[:,0], polar_data[:,1], s=None, c="b") | |||
| # # plt.scatter(cluster2[:,0], cluster2[:,1], s=None, c="r") | |||
| # plt.show() | |||
| data_x = polar_data[:,0] | |||
| data_y = polar_data[:,1] | |||
| cluster_center1 = polar_data[np.random.randint(data_x.shape[0])] | |||
| cluster_center2 = polar_data[np.random.randint(data_y.shape[0])] | |||
| cluster1_index = np.array([],dtype="int64") | |||
| cluster2_index = np.array([],dtype="int64") | |||
| last_center1 = np.array([],dtype="float64") | |||
| last_center2 = np.array([],dtype="float64") | |||
| while True: | |||
| for i in range(data.shape[0]): | |||
| # print(origin) | |||
| dist1 = get_distance(polar_data[i],cluster_center1)#获取距离 | |||
| dist2 = get_distance(polar_data[i],cluster_center2) | |||
| if dist1 > dist2:#比距离,放到距离中心小的坐标系中 | |||
| cluster2_index = np.append(cluster2_index,i) | |||
| else: | |||
| cluster1_index = np.append(cluster1_index,i) | |||
| last_center1 = cluster_center1 | |||
| last_center2 = cluster_center2 | |||
| cluster1 = polar_data[cluster1_index.tolist()]#获取两类数据 | |||
| cluster2 = polar_data[cluster2_index.tolist()] | |||
| cluster_center1 = np.mean(cluster1[:,:2],axis=0)#求均值重新判断中心 | |||
| cluster_center2 = np.mean(cluster2[:,:2],axis=0) | |||
| if(cluster_center1 == last_center1).all() and (cluster_center2 == last_center2).all():##如果两次聚类没有发生变化就弹出 | |||
| break | |||
| # print("1: ",cluster_center1," ",last_center1) | |||
| # print("2: ",cluster_center2," ",last_center2) | |||
| # print(cluster1_index.shape[0]) | |||
| # print(cluster2_index.shape[0]) | |||
| # print() | |||
| cluster1_index = np.array([],dtype="int64") | |||
| cluster2_index = np.array([],dtype="int64") | |||
| # print(cluster1_index) | |||
| # print(cluster2_index) | |||
| plt.scatter(data[cluster1_index.tolist()][:,0], data[cluster1_index.tolist()][:,1], s=None, c="b") | |||
| plt.scatter(data[cluster2_index.tolist()][:,0], data[cluster2_index.tolist()][:,1], s=None, c="r") | |||
| plt.show() | |||
| def main(): | |||
| kmeans() | |||
| if __name__ == "__main__": | |||
| main() | |||