import mxnet as mx import numpy as np dataset_size = 200000 X = np.random.rand(dataset_size, 2) labels = np.zeros((dataset_size, 3)) labels[X[:, 0] > X[:,1]] = [0,0,1] labels[X[:, 0] <= X[:,1]] = [1,0,0] labels[X[:,1] + X[:, 0] > 1] = [0, 1, 0] #x = mx.nd.random.uniform(shape=(1, 2)) #t = mx.nd.random.uniform(shape=(1, 3,)) theta1 = mx.nd.random.normal(shape=(2, 12), scale = 0.01) bias1 = mx.nd.random.normal(shape=(1,12,), scale = 0.01) theta2 = mx.nd.random.normal(shape=(12,3), scale = 0.01) bias2 = mx.nd.random.normal(shape=(1, 3), scale = 0.01) params = [theta1, bias1, theta2, bias2] def forward(x): y = mx.ndarray.linalg.gemm2(x, theta1, alpha=1.) + bias1 y = mx.ndarray.maximum(y, 0) return mx.ndarray.linalg.gemm2(y, theta2, alpha=1.) + bias2 def softmax(x): e = mx.nd.exp(x) s = mx.nd.sum(e, axis=1, keepdims=True) return e/s def crossentropy(y, t): y = mx.nd.sum(y * t, axis=1) return - mx.nd.mean(mx.nd.log(y)) batch_size=20 for i in range(min(dataset_size, 100000) // batch_size ): lr = 0.5 * (.1 ** ( max(i - 100 , 0) // 1000)) x = mx.nd.array(X[batch_size*i:batch_size*(i+1)]) t = mx.nd.array(labels[batch_size*i:batch_size*(i+1)]) for p in params: p.attach_grad() with mx.autograd.record(): y = forward(x) z = crossentropy(softmax(y), t) print(z.asnumpy()[0]) z.backward() for p in params: p[:,:] = p - lr * p.grad accuracy = 0 for i in range(1000): x = mx.nd.array(X[i:i+1]) t = mx.nd.array(labels[i:i+1]) y = forward(x) eq= (t.argmax(axis=1) == y.argmax(axis=1)) accuracy += eq.asnumpy()[0] print("Accuracy", accuracy / 1000.) # accuracy > 99.