| @@ -0,0 +1,81 @@ | |||||
| import matplotlib.pyplot as plt | |||||
| from numpy.ma.core import set_fill_value | |||||
| from sklearn.datasets import load_digits | |||||
| import numpy as np | |||||
| class LogReg: | |||||
| def __init__(self, alpha, lam, iter): | |||||
| digits = load_digits() | |||||
| self.image = digits.images | |||||
| #print(self.digits.data.shape) | |||||
| self.train_size = int(digits.data.shape[0]*0.9) | |||||
| self.train = digits.data[:self.train_size][:] | |||||
| self.test = digits.data[self.train_size:][:] | |||||
| self.train = self.train.astype(np.float64) | |||||
| self.target = digits.target[:self.train_size] | |||||
| self.test_target = digits.target[self.train_size:] | |||||
| self.theta = np.zeros((10,64))#(种类,8*8) | |||||
| self.alpha = alpha | |||||
| self.lam = lam | |||||
| self.iter = iter | |||||
| def sigmoid(self, x): | |||||
| return 1 / (1 + np.exp(-x)) #sigmoid function | |||||
| def OneHot(self,data): | |||||
| tmp = np.zeros((10,)) | |||||
| tmp[data] = 1 | |||||
| return tmp | |||||
| def preprocessing(self): | |||||
| self.train = np.array([(i-np.min(i))/(np.max(i) - np.min(i)) for i in self.train]).astype(np.float64)#归一化 | |||||
| self.test = np.array([(i-np.min(i))/(np.max(i) - np.min(i)) for i in self.test]).astype(np.float64)#归一化 | |||||
| self.target = np.array([self.OneHot(i) for i in self.target]).astype(np.float64)#标签独热化 | |||||
| def Costfunction(self,i): | |||||
| # argument i mean the ith | |||||
| theta = self.theta[i, :].reshape(self.theta.shape[1], 1) | |||||
| label = self.target[:,i] | |||||
| data = self.train | |||||
| h = self.sigmoid(data.dot(theta)) | |||||
| # 代价函数 | |||||
| cost = (-1/self.train_size) * (label.T.dot(np.log(h))+(1-label).T.dot(np.log(1-h)))\ | |||||
| + (self.lam/(2*self.train_size)) * theta.T.dot(theta) | |||||
| #求梯度 | |||||
| grad = (1 / self.train_size) * data.T.dot(h - label.reshape(-1,1)) | |||||
| # print(h) | |||||
| grad = grad + (self.lam / self.train_size * theta) | |||||
| # print(grad.shape) | |||||
| return cost, grad | |||||
| def regression(self): | |||||
| for i in range(10): | |||||
| for j in range(self.iter): | |||||
| [J, grad] = self.Costfunction(i) | |||||
| self.theta[i, :] = self.theta[i, :] - self.alpha * grad.T | |||||
| print(J) | |||||
| # print(self.theta) | |||||
| return self.theta | |||||
| def predict(self): | |||||
| # calc 10 Hypothesis functions and select the max one | |||||
| # use argmax(1) to get the index of max_val of each row | |||||
| pred = self.sigmoid(self.test.dot(self.theta.T)).argmax(1) | |||||
| num = 0 | |||||
| for i in range(len(self.test_target)): | |||||
| if(pred[i] == self.test_target[i]): | |||||
| num += 1 | |||||
| print(float(num)/float(len(self.test_target))*100.0,"%") | |||||
| def main(): | |||||
| LR = LogReg( 1, 0.1, 3000) | |||||
| LR.preprocessing() | |||||
| LR.regression() | |||||
| LR.predict() | |||||
| if __name__ == "__main__": | |||||
| main() | |||||