| @@ -0,0 +1,81 @@ | |||
| import matplotlib.pyplot as plt | |||
| from numpy.ma.core import set_fill_value | |||
| from sklearn.datasets import load_digits | |||
| import numpy as np | |||
| class LogReg: | |||
| def __init__(self, alpha, lam, iter): | |||
| digits = load_digits() | |||
| self.image = digits.images | |||
| #print(self.digits.data.shape) | |||
| self.train_size = int(digits.data.shape[0]*0.9) | |||
| self.train = digits.data[:self.train_size][:] | |||
| self.test = digits.data[self.train_size:][:] | |||
| self.train = self.train.astype(np.float64) | |||
| self.target = digits.target[:self.train_size] | |||
| self.test_target = digits.target[self.train_size:] | |||
| self.theta = np.zeros((10,64))#(种类,8*8) | |||
| self.alpha = alpha | |||
| self.lam = lam | |||
| self.iter = iter | |||
| def sigmoid(self, x): | |||
| return 1 / (1 + np.exp(-x)) #sigmoid function | |||
| def OneHot(self,data): | |||
| tmp = np.zeros((10,)) | |||
| tmp[data] = 1 | |||
| return tmp | |||
| def preprocessing(self): | |||
| self.train = np.array([(i-np.min(i))/(np.max(i) - np.min(i)) for i in self.train]).astype(np.float64)#归一化 | |||
| self.test = np.array([(i-np.min(i))/(np.max(i) - np.min(i)) for i in self.test]).astype(np.float64)#归一化 | |||
| self.target = np.array([self.OneHot(i) for i in self.target]).astype(np.float64)#标签独热化 | |||
| def Costfunction(self,i): | |||
| # argument i mean the ith | |||
| theta = self.theta[i, :].reshape(self.theta.shape[1], 1) | |||
| label = self.target[:,i] | |||
| data = self.train | |||
| h = self.sigmoid(data.dot(theta)) | |||
| # 代价函数 | |||
| cost = (-1/self.train_size) * (label.T.dot(np.log(h))+(1-label).T.dot(np.log(1-h)))\ | |||
| + (self.lam/(2*self.train_size)) * theta.T.dot(theta) | |||
| #求梯度 | |||
| grad = (1 / self.train_size) * data.T.dot(h - label.reshape(-1,1)) | |||
| # print(h) | |||
| grad = grad + (self.lam / self.train_size * theta) | |||
| # print(grad.shape) | |||
| return cost, grad | |||
| def regression(self): | |||
| for i in range(10): | |||
| for j in range(self.iter): | |||
| [J, grad] = self.Costfunction(i) | |||
| self.theta[i, :] = self.theta[i, :] - self.alpha * grad.T | |||
| print(J) | |||
| # print(self.theta) | |||
| return self.theta | |||
| def predict(self): | |||
| # calc 10 Hypothesis functions and select the max one | |||
| # use argmax(1) to get the index of max_val of each row | |||
| pred = self.sigmoid(self.test.dot(self.theta.T)).argmax(1) | |||
| num = 0 | |||
| for i in range(len(self.test_target)): | |||
| if(pred[i] == self.test_target[i]): | |||
| num += 1 | |||
| print(float(num)/float(len(self.test_target))*100.0,"%") | |||
| def main(): | |||
| LR = LogReg( 1, 0.1, 3000) | |||
| LR.preprocessing() | |||
| LR.regression() | |||
| LR.predict() | |||
| if __name__ == "__main__": | |||
| main() | |||