import numpy as np class Tensor(object): def __init__(self, data, autograd=False, creators=None, creation_op=None, id=None): self.data = np.array(data) self.autograd = autograd self.grad = None if (id is None): self.id = np.random.randint(0, 100000) else: self.id = id self.creators = creators self.creation_op = creation_op self.children = {} if (creators is not None): for c in creators: if (self.id not in c.children): c.children[self.id] = 1 else: c.children[self.id] += 1 def all_children_grads_accounted_for(self): for id, cnt in self.children.items(): if (cnt != 0): return False return True def backward(self, grad=None, grad_origin=None): if (self.autograd): if (grad is None): grad = Tensor(np.ones_like(self.data)) if (grad_origin is not None): if (self.children[grad_origin.id] == 0): raise Exception("cannot backprop more than once") else: self.children[grad_origin.id] -= 1 if (self.grad is None): self.grad = grad else: self.grad += grad # grads must not have grads of their own assert grad.autograd == False # only continue backpropping if there's something to # backprop into and if all gradients (from children) # are accounted for override waiting for children if # "backprop" was called on this variable directly if (self.creators is not None and (self.all_children_grads_accounted_for() or grad_origin is None)): if (self.creation_op == "add"): self.creators[0].backward(self.grad, self) self.creators[1].backward(self.grad, self) if (self.creation_op == "sub"): self.creators[0].backward(Tensor(self.grad.data), self) self.creators[1].backward(Tensor(self.grad.__neg__().data), self) if (self.creation_op == "mul"): new = self.grad * self.creators[1] self.creators[0].backward(new, self) new = self.grad * self.creators[0] self.creators[1].backward(new, self) if (self.creation_op == "mm"): c0 = self.creators[0] c1 = self.creators[1] new = self.grad.mm(c1.transpose()) c0.backward(new) new = self.grad.transpose().mm(c0).transpose() c1.backward(new) if (self.creation_op == "transpose"): self.creators[0].backward(self.grad.transpose()) if ("sum" in self.creation_op): dim = int(self.creation_op.split("_")[1]) self.creators[0].backward(self.grad.expand(dim, self.creators[0].data.shape[dim])) if ("expand" in self.creation_op): dim = int(self.creation_op.split("_")[1]) self.creators[0].backward(self.grad.sum(dim)) if (self.creation_op == "neg"): self.creators[0].backward(self.grad.__neg__()) if (self.creation_op == "sigmoid"): ones = Tensor(np.ones_like(self.grad.data)) self.creators[0].backward(self.grad * (self * (ones - self))) if (self.creation_op == "tanh"): ones = Tensor(np.ones_like(self.grad.data)) self.creators[0].backward(self.grad * (ones - (self * self))) if (self.creation_op == "index_select"): new_grad = np.zeros_like(self.creators[0].data) indices_ = self.index_select_indices.data.flatten() grad_ = grad.data.reshape(len(indices_), -1) for i in range(len(indices_)): new_grad[indices_[i]] += grad_[i] self.creators[0].backward(Tensor(new_grad)) def __add__(self, other): if (self.autograd and other.autograd): return Tensor(self.data + other.data, autograd=True, creators=[self, other], creation_op="add") return Tensor(self.data + other.data) def __neg__(self): if (self.autograd): return Tensor(self.data * -1, autograd=True, creators=[self], creation_op="neg") return Tensor(self.data * -1) def __sub__(self, other): if (self.autograd and other.autograd): return Tensor(self.data - other.data, autograd=True, creators=[self, other], creation_op="sub") return Tensor(self.data - other.data) def __mul__(self, other): if (self.autograd and other.autograd): return Tensor(self.data * other.data, autograd=True, creators=[self, other], creation_op="mul") return Tensor(self.data * other.data) def sum(self, dim): if (self.autograd): return Tensor(self.data.sum(dim), autograd=True, creators=[self], creation_op="sum_" + str(dim)) return Tensor(self.data.sum(dim)) def expand(self, dim, copies): trans_cmd = list(range(0, len(self.data.shape))) trans_cmd.insert(dim, len(self.data.shape)) new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd) if (self.autograd): return Tensor(new_data, autograd=True, creators=[self], creation_op="expand_" + str(dim)) return Tensor(new_data) def transpose(self): if (self.autograd): return Tensor(self.data.transpose(), autograd=True, creators=[self], creation_op="transpose") return Tensor(self.data.transpose()) def mm(self, x): if (self.autograd): return Tensor(self.data.dot(x.data), autograd=True, creators=[self, x], creation_op="mm") return Tensor(self.data.dot(x.data)) def sigmoid(self): if (self.autograd): return Tensor(1 / (1 + np.exp(-self.data)), autograd=True, creators=[self], creation_op="sigmoid") return Tensor(1 / (1 + np.exp(-self.data))) def tanh(self): if (self.autograd): return Tensor(np.tanh(self.data), autograd=True, creators=[self], creation_op="tanh") return Tensor(np.tanh(self.data)) def index_select(self, indices): if (self.autograd): new = Tensor(self.data[indices.data], autograd=True, creators=[self], creation_op="index_select") new.index_select_indices = indices return new return Tensor(self.data[indices.data]) def __repr__(self): return str(self.data.__repr__()) def __str__(self): return str(self.data.__str__()) class Layer(object): def __init__(self): self.parameters = list() def get_parameters(self): return self.parameters class Tanh(Layer): def __init__(self): super().__init__() def forward(self, input): return input.tanh() class Embedding(Layer): def __init__(self, vocab_size, dim): super().__init__() self.vocab_size = vocab_size self.dim = dim # this random initialiation style is just a convention from word2vec self.weight = (np.random.rand(vocab_size, dim) - 0.5) / dim class Sigmoid(Layer): def __init__(self): super().__init__() def forward(self, input): return input.sigmoid() x = Tensor(np.eye(5), autograd=True) x.index_select(Tensor([[1,2,3],[2,3,4]])).backward() print(x.grad)
''' 2 he 出现了2次,所以对应的行时2 2 2 2 2 [[0. 0. 0. 0. 0.] [1. 1. 1. 1. 1.] [2. 2. 2. 2. 2.] [2. 2. 2. 2. 2.] [1. 1. 1. 1. 1.]] '''