from keras import backend as K import numpy as np dataset_size = 200000 X = np.random.rand(dataset_size, 2) labels = np.zeros((dataset_size, 3)) labels[X[:, 0] > X[:,1]] = [0,0,1] labels[X[:, 0] <= X[:,1]] = [1,0,0] labels[X[:,1] + X[:, 0] > 1] = [0, 1, 0] x = K.placeholder(shape=(None, 2)) t = K.placeholder(shape=(None, 3)) theta1 = K.random_normal_variable(shape=(2, 12), mean=0, scale=0.01) bias1 = K.random_normal_variable(shape=(1, 12), mean=0, scale=0.01) theta2 = K.random_normal_variable(shape=(12, 3), mean=0, scale=0.01) bias2 = K.random_normal_variable(shape=(1, 3), mean=0, scale=0.01) #theta1 = K.variable(value=np.random.normal(scale = 0.01, size=(2,12))) #bias1 = K.variable(value=np.random.normal(scale = 0.01, size=(1,12))) #theta2 = K.variable(value=np.random.normal(scale = 0.01, size=(12,3))) #bias2 = K.variable(value=np.random.normal(scale = 0.01, size=(1,3))) def forward(x): y = K.dot(x, theta1) + bias1 y = K.maximum(y, 0.) return K.dot(y, theta2) + bias2 def softmax(x): e = K.exp(x) s = K.sum(e, axis=1, keepdims=True) return e/s def crossentropy(y, t): prob = K.sum(y*t, axis=1) return - K.mean(K.log(prob)) loss = crossentropy(softmax(forward(x)),t) params= [theta1, bias1, theta2, bias2] grad = K.gradients(loss, params) f = K.function([x,t], [loss]+grad) batch_size = 20 for i in range(min(dataset_size, 100000) // batch_size ): lr = 0.5 * (.1 ** ( max(i - 100 , 0) // 1000)) sample = X[batch_size*i:batch_size*(i+1)] target = labels[batch_size*i:batch_size*(i+1)] outputs = f([sample, target]) for param,grad in zip(params, outputs[1:]): K.set_value(param, K.eval(param) - grad * lr) print("cost {} - learning rate {}".format(outputs[0], lr)) f = K.function([x], [K.argmax(forward(x),axis=1)]) accuracy = 0 for i in range(1000): sample = X[batch_size*i:batch_size*(i+1)] target = labels[batch_size*i:batch_size*(i+1)] tt = f([sample])[0] accuracy += np.sum(tt == np.argmax(target, axis=1)) print("Accuracy", accuracy / 1000. /batch_size) # accuracy 99.44