# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Gradients for operators defined in array_ops.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import constant_op
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import gen_array_ops
@ops.RegisterGradient("Pack")
def _PackGrad(op, grad):
"""Gradient for pack op."""
return array_ops.unpack(grad, num=op.get_attr('N'))
@ops.RegisterGradient("Unpack")
def _UnpackGrad(_, *grads):
"""Gradient for unpack op."""
return array_ops.pack(grads)
@ops.RegisterGradient("Concat")
def _ConcatGrad(op, grad):
"""Gradient for concat op."""
assert isinstance(grad, ops.Tensor)
# Degenerate concatenation, just return grad.
if len(op.inputs) == 2:
return [None, grad]
# Get the inputs' tensor shapes
sizes = [array_ops.shape(x) for x in op.inputs[1:]]
concat_dim = op.inputs[0]
# Since shape is 1-D, shape_of_shape = [rank-of-inputs]
shape_of_shape = array_ops.shape(sizes[0])
# Make a vector of length equal to the input's dimensions,
# with 0's everywhere and 1 in the concat dim position.
# Note: Can't use sparse_to_dense since it isn't GPU-capable (for now)
mask = array_ops.concat(0,
[array_ops.fill(
array_ops.expand_dims(concat_dim, 0), 0), [1],
array_ops.fill(shape_of_shape - concat_dim - 1, 0)])
out_grads = []
begin = array_ops.fill(shape_of_shape, 0)
for i in range(len(sizes)):
out_grads.append(array_ops.slice(grad, begin, sizes[i]))
# Lint complains begin = begin + ...
begin = math_ops.add(begin, sizes[i] * mask)
return [None] + out_grads
@ops.RegisterGradient("Slice")
def _SliceGrad(op, grad):
"""Gradient for Slice op."""
# Create an Nx2 padding where the first column represents how many
# zeros are to be prepended for each dimension, and the second
# column indicates how many zeros are appended.
#
# The number of zeros to append is the shape of the input
# elementwise-subtracted by both the begin vector and sizes vector.
#
# Some more reshaping is needed to assemble this tensor with the
# right dimensions.
input_vec = op.inputs[0]
begin_vec = op.inputs[1]
input_rank = array_ops.rank(input_vec)
slice_size = array_ops.shape(op.outputs[0])
shape = array_ops.pack([input_rank, 1])
before_pad = array_ops.reshape(begin_vec, shape)
after_pad = array_ops.reshape(
array_ops.shape(input_vec) - slice_size - begin_vec, shape)
paddings = array_ops.concat(1, [before_pad, after_pad])
return array_ops.pad(grad, paddings), None, None
@ops.RegisterGradient("Split")
def _SplitGrad(op, *grads):
return None, array_ops.concat(op.inputs[0], list(grads))
ops.NoGradient("Const")
# TODO(liqzhang): The gradient for Diag operator would be
# the diagonal of the backprop. Implement if there is a need.
ops.NoGradient("Diag")
# Edit Distance has no gradient (but can be used to eval seq2seq or CTC).
ops.NoGradient("EditDistance")
ops.NoGradient("Fill")
@ops.RegisterGradient("Gather")
def _GatherGrad(op, grad):
return [
ops.IndexedSlices(grad, op.inputs[1], array_ops.shape(op.inputs[0])), None
]
@ops.RegisterGradient("Identity")
def _IdGrad(_, grad):
return grad
@ops.RegisterGradient("RefIdentity")
def _RefIdGrad(_, grad):
return grad
ops.NoGradient("StopGradient")
@ops.RegisterGradient("Reshape")
def _ReshapeGrad(op, grad):
return [array_ops.reshape(grad, array_ops.shape(op.inputs[0])), None]
ops.NoGradient("InvertPermutation")
def _ReshapeToInput(op, grad):
"""Reshapes the gradient to the shape of the original input."""
return array_ops.reshape(grad, array_ops.shape(op.inputs[0]))
@ops.RegisterGradient("ExpandDims")
def _ExpandDimsGrad(op, grad):
return [_ReshapeToInput(op, grad), None]
@ops.RegisterGradient("Squeeze")
def _SqueezeGrad(op, grad):
return _ReshapeToInput(op, grad)
@ops.RegisterGradient("Transpose")
def _TransposeGrad(op, grad):
"""Returns unshuffle(grad)."""
p = op.inputs[1]
return [array_ops.transpose(grad, array_ops.invert_permutation(p)), None]
ops.NoGradient("Shape")
ops.NoGradient("Rank")
ops.NoGradient("Size")
@ops.RegisterGradient("Tile")
def _TileGrad(op, grad):
"""Sum reduces grad along the tiled dimensions."""
assert isinstance(grad, ops.Tensor)
return [gen_array_ops._tile_grad(grad, op.inputs[1]), None]
ops.NoGradient("TileGrad")
ops.NoGradient("BroadcastGradientArgs")
@ops.RegisterGradient("Pad")
def _PadGrad(op, grad):
"""Gradient for Pad."""
# Pad introduces values around the original tensor, so the gradient function
# slices the original shape out of the gradient."""
x = op.inputs[0]
a = op.inputs[1] # [Rank(x), 2]
# Takes a slice of a. The 1st column. [Rank(x), 1].
pad_before = array_ops.slice(a, [0, 0],
array_ops.pack([array_ops.rank(x), 1]))
# Make it a 1-D tensor.
begin = array_ops.reshape(pad_before, [-1])
sizes = array_ops.shape(x)
return array_ops.slice(grad, begin, sizes), None
# ReverseSequence is just a permutation. The gradient permutes back.
@ops.RegisterGradient("ReverseSequence")
def _ReverseSequenceGrad(op, grad):
seq_lengths = op.inputs[1]
return [array_ops.reverse_sequence(grad,
seq_dim=op.get_attr("seq_dim"),
seq_lengths=seq_lengths),
None]