EVOLUTION-MANAGER

Edit File: dnn.py

# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Deep Neural Network estimators."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import six
import tensorflow as tf
from tensorflow.python.feature_column import feature_column
from tensorflow.python.feature_column import feature_column_lib
from tensorflow.python.framework import ops
from tensorflow.python.keras.engine import training
from tensorflow.python.keras.layers import core as keras_core
from tensorflow.python.keras.layers import normalization as keras_norm
from tensorflow.python.keras.utils import losses_utils
from tensorflow.python.util.tf_export import estimator_export
from tensorflow_estimator.python.estimator import estimator
from tensorflow_estimator.python.estimator.canned import head as head_lib
from tensorflow_estimator.python.estimator.canned import optimizers
from tensorflow_estimator.python.estimator.head import head_utils
from tensorflow_estimator.python.estimator.head import regression_head
from tensorflow_estimator.python.estimator.mode_keys import ModeKeys

# The default learning rate of 0.05 is a historical artifact of the initial
# implementation, but seems a reasonable choice.
_LEARNING_RATE = 0.05

def _add_hidden_layer_summary(value, tag):
  tf.compat.v1.summary.scalar('%s/fraction_of_zero_values' % tag,
                              tf.math.zero_fraction(value))
  tf.compat.v1.summary.histogram('%s/activation' % tag, value)

@estimator_export(v1=['estimator.experimental.dnn_logit_fn_builder'])
def dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
                         dropout, input_layer_partitioner, batch_norm):
  """Function builder for a dnn logit_fn.

Args:
    units: An int indicating the dimension of the logit layer.  In the MultiHead
      case, this should be the sum of all component Heads' logit dimensions.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    input_layer_partitioner: Partitioner for input layer.
    batch_norm: Whether to use batch normalization after each hidden layer.

Returns:
    A logit_fn (see below).

Raises:
    ValueError: If units is not an int.
  """
  if not isinstance(units, six.integer_types):
    raise ValueError('units must be an int.  Given type: {}'.format(
        type(units)))

def dnn_logit_fn(features, mode):
    """Deep Neural Network logit_fn.

Args:
      features: This is the first item returned from the `input_fn` passed to
        `train`, `evaluate`, and `predict`. This should be a single `Tensor` or
        `dict` of same.
      mode: Optional. Specifies if this training, evaluation or prediction. See
        `ModeKeys`.

Returns:
      A `Tensor` representing the logits, or a list of `Tensor`'s representing
      multiple logits in the MultiHead case.
    """
    dnn_model = _DNNModel(
        units,
        hidden_units,
        feature_columns,
        activation_fn,
        dropout,
        input_layer_partitioner,
        batch_norm,
        name='dnn')
    return dnn_model(features, mode)

return dnn_logit_fn

def dnn_logit_fn_builder_v2(units, hidden_units, feature_columns, activation_fn,
                            dropout, batch_norm):
  """Function builder for a dnn logit_fn.

Returns:
    A logit_fn (see below).

Raises:
    ValueError: If units is not an int.
  """
  if not isinstance(units, six.integer_types):
    raise ValueError('units must be an int.  Given type: {}'.format(
        type(units)))

def dnn_logit_fn(features, mode):
    """Deep Neural Network logit_fn.

Returns:
      A `Tensor` representing the logits, or a list of `Tensor`'s representing
      multiple logits in the MultiHead case.
    """
    dnn_model = _DNNModelV2(
        units,
        hidden_units,
        feature_columns,
        activation_fn,
        dropout,
        batch_norm,
        name='dnn')
    return dnn_model(features, mode)

return dnn_logit_fn

def _get_previous_name_scope():
  current_name_scope = ops.get_name_scope()
  return current_name_scope.rsplit('/', 1)[0] + '/'

class _DNNModel(training.Model):
  """A DNN Model."""

def __init__(self,
               units,
               hidden_units,
               feature_columns,
               activation_fn,
               dropout,
               input_layer_partitioner,
               batch_norm,
               name=None,
               **kwargs):
    super(_DNNModel, self).__init__(name=name, **kwargs)
    if feature_column_lib.is_feature_column_v2(feature_columns):
      self._input_layer = tf.compat.v1.keras.layers.DenseFeatures(
          feature_columns=feature_columns, name='input_layer')
    else:
      self._input_layer = feature_column.InputLayer(
          feature_columns=feature_columns,
          name='input_layer',
          create_scope_now=False)

self._add_layer(self._input_layer, 'input_layer')

self._dropout = dropout
    self._batch_norm = batch_norm

self._hidden_layers = []
    self._dropout_layers = []
    self._batch_norm_layers = []
    self._hidden_layer_scope_names = []
    for layer_id, num_hidden_units in enumerate(hidden_units):
      with tf.compat.v1.variable_scope('hiddenlayer_%d' %
                                       layer_id) as hidden_layer_scope:
        hidden_layer = tf.compat.v1.layers.Dense(
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
            name=hidden_layer_scope,
            _scope=hidden_layer_scope)
        self._add_layer(hidden_layer, hidden_layer_scope.name)
        self._hidden_layer_scope_names.append(hidden_layer_scope.name)
        self._hidden_layers.append(hidden_layer)
        if self._dropout is not None:
          dropout_layer = tf.compat.v1.layers.Dropout(rate=self._dropout)
          self._add_layer(dropout_layer, dropout_layer.name)
          self._dropout_layers.append(dropout_layer)
        if self._batch_norm:
          batch_norm_layer = tf.compat.v1.layers.BatchNormalization(
              # The default momentum 0.99 actually crashes on certain
              # problem, so here we use 0.999, which is the default of
              # tf.contrib.layers.batch_norm.
              momentum=0.999,
              trainable=True,
              name='batchnorm_%d' % layer_id,
              _scope='batchnorm_%d' % layer_id)
          self._add_layer(batch_norm_layer, batch_norm_layer.name)
          self._batch_norm_layers.append(batch_norm_layer)

with tf.compat.v1.variable_scope('logits') as logits_scope:
      self._logits_layer = tf.compat.v1.layers.Dense(
          units=units,
          activation=None,
          kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
          name=logits_scope,
          _scope=logits_scope)
      self._add_layer(self._logits_layer, logits_scope.name)
      self._logits_scope_name = logits_scope.name
    self._input_layer_partitioner = input_layer_partitioner

def call(self, features, mode):
    is_training = mode == ModeKeys.TRAIN
    # The Keras training.Model adds a name_scope with the name of the model
    # which modifies the constructed graph. Hence we add another name_scope
    # here which is the one before the training.Model one was applied.
    # TODO(rohanj): Remove this in TF 2.0 (b/116728605)
    with ops.name_scope(name=_get_previous_name_scope()):
      # TODO(rohanj): Remove dependence on variable scope for partitioning.
      with tf.compat.v1.variable_scope(
          'input_from_feature_columns',
          partitioner=self._input_layer_partitioner):
        try:
          net = self._input_layer(features, training=is_training)
        except TypeError:
          net = self._input_layer(features)
      for i in range(len(self._hidden_layers)):
        net = self._hidden_layers[i](net)
        if self._dropout is not None and is_training:
          net = self._dropout_layers[i](net, training=True)
        if self._batch_norm:
          net = self._batch_norm_layers[i](net, training=is_training)
        _add_hidden_layer_summary(net, self._hidden_layer_scope_names[i])

logits = self._logits_layer(net)
      _add_hidden_layer_summary(logits, self._logits_scope_name)
      return logits

def _add_layer(self, layer, layer_name):
    # "Magic" required for keras.Model classes to track all the variables in
    # a list of layers.Layer objects.
    # TODO(ashankar): Figure out API so user code doesn't have to do this.
    setattr(self, layer_name, layer)

def _name_from_scope_name(name):
  """Returns the name of an op given the name of its scope.

Args:
    name: the name of the scope.

Returns:
    the name of the op (equal to scope name minus any trailing slash).
  """
  return name[:-1] if (name and name[-1] == '/') else name

class _DNNModelV2(training.Model):
  """A DNN Model."""

def __init__(self,
               units,
               hidden_units,
               feature_columns,
               activation_fn,
               dropout,
               batch_norm,
               name=None,
               **kwargs):
    super(_DNNModelV2, self).__init__(name=name, **kwargs)
    with ops.name_scope(
        'input_from_feature_columns') as input_feature_column_scope:
      layer_name = input_feature_column_scope + 'input_layer'
      if feature_column_lib.is_feature_column_v2(feature_columns):
        self._input_layer = tf.compat.v2.keras.layers.DenseFeatures(
            feature_columns=feature_columns, name=layer_name)
      else:
        raise ValueError(
            'Received a feature column from TensorFlow v1, but this is a '
            'TensorFlow v2 Estimator. Please either use v2 feature columns '
            '(accessible via tf.feature_column.* in TF 2.x) with this '
            'Estimator, or switch to a v1 Estimator for use with v1 feature '
            'columns (accessible via tf.compat.v1.estimator.* and '
            'tf.compat.v1.feature_column.*, respectively.')

self._dropout = dropout
    self._batch_norm = batch_norm

self._hidden_layers = []
    self._dropout_layers = []
    self._batch_norm_layers = []
    self._hidden_layer_scope_names = []
    for layer_id, num_hidden_units in enumerate(hidden_units):
      with ops.name_scope('hiddenlayer_%d' % layer_id) as hidden_layer_scope:
        # Get scope name without the trailing slash.
        hidden_shared_name = _name_from_scope_name(hidden_layer_scope)
        hidden_layer = keras_core.Dense(
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
            name=hidden_shared_name)
        self._hidden_layer_scope_names.append(hidden_shared_name)
        self._hidden_layers.append(hidden_layer)
        if self._dropout is not None:
          dropout_layer = keras_core.Dropout(rate=self._dropout)
          self._dropout_layers.append(dropout_layer)
        if self._batch_norm:
          batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id
          # TODO(scottzhu): Change back to use BatchNormalization when the
          # cleanup is done.
          batch_norm_layer = keras_norm.BatchNormalizationBase(
              # The default momentum 0.99 actually crashes on certain
              # problem, so here we use 0.999, which is the default of
              # tf.contrib.layers.batch_norm.
              momentum=0.999,
              trainable=True,
              name=batch_norm_name)
          self._batch_norm_layers.append(batch_norm_layer)

with ops.name_scope('logits') as logits_scope:
      logits_shared_name = _name_from_scope_name(logits_scope)
      self._logits_layer = keras_core.Dense(
          units=units,
          activation=None,
          kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
          name=logits_shared_name)
      self._logits_scope_name = logits_shared_name

def call(self, features, mode):
    is_training = mode == ModeKeys.TRAIN
    try:
      net = self._input_layer(features, training=is_training)
    except TypeError:
      net = self._input_layer(features)
    for i in range(len(self._hidden_layers)):
      net = self._hidden_layers[i](net)
      if self._dropout is not None and is_training:
        net = self._dropout_layers[i](net, training=True)
      if self._batch_norm:
        net = self._batch_norm_layers[i](net, training=is_training)
      _add_hidden_layer_summary(net, self._hidden_layer_scope_names[i])

logits = self._logits_layer(net)
    _add_hidden_layer_summary(logits, self._logits_scope_name)
    return logits

def _validate_features(features):
  if not isinstance(features, dict):
    raise ValueError('features should be a dictionary of `Tensor`s. '
                     'Given type: {}'.format(type(features)))

def _get_dnn_estimator_spec(use_tpu, head, features, labels, mode, logits,
                            optimizer):
  """Get EstimatorSpec for DNN Model."""
  if use_tpu:
    return head._create_tpu_estimator_spec(  # pylint: disable=protected-access
        features=features,
        mode=mode,
        labels=labels,
        optimizer=optimizer,
        logits=logits)
  else:
    return head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        optimizer=optimizer,
        logits=logits)

def _dnn_model_fn(features,
                  labels,
                  mode,
                  head,
                  hidden_units,
                  feature_columns,
                  optimizer='Adagrad',
                  activation_fn=tf.nn.relu,
                  dropout=None,
                  input_layer_partitioner=None,
                  config=None,
                  use_tpu=False,
                  batch_norm=False):
  """Deep Neural Net model_fn v1.

Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction. See
      `ModeKeys`.
    head: A `head_lib._Head` instance.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    optimizer: String, `tf.Optimizer` object, or callable that creates the
      optimizer to use for training. If not specified, will use the Adagrad
      optimizer with a default learning rate of 0.05.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    input_layer_partitioner: Partitioner for input layer. Defaults to
      `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    config: `RunConfig` object to configure the runtime settings.
    use_tpu: Whether to make a DNN model able to run on TPU. Will make function
      return a `_TPUEstimatorSpec` instance and disable variable partitioning.
    batch_norm: Whether to use batch normalization after each hidden layer.

Returns:
    An `EstimatorSpec` instance.

Raises:
    ValueError: If features has the wrong type.
  """

optimizer = optimizers.get_optimizer_instance(
      optimizer, learning_rate=_LEARNING_RATE)

_validate_features(features)

num_ps_replicas = config.num_ps_replicas if config else 0

partitioner = (None if use_tpu else tf.compat.v1.min_max_variable_partitioner(
      max_partitions=num_ps_replicas))
  with tf.compat.v1.variable_scope(
      'dnn', values=tuple(six.itervalues(features)), partitioner=partitioner):
    input_layer_partitioner = input_layer_partitioner or (
        None if use_tpu else tf.compat.v1.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20))

logit_fn = dnn_logit_fn_builder(
        units=head.logits_dimension,
        hidden_units=hidden_units,
        feature_columns=feature_columns,
        activation_fn=activation_fn,
        dropout=dropout,
        input_layer_partitioner=input_layer_partitioner,
        batch_norm=batch_norm)
    logits = logit_fn(features=features, mode=mode)

return _get_dnn_estimator_spec(use_tpu, head, features, labels, mode,
                                   logits, optimizer)

def _dnn_model_fn_builder_v2(units, hidden_units, feature_columns,
                             activation_fn, dropout, batch_norm, features,
                             mode):
  """Function builder for dnn logits, trainable variables and update ops.

Args:
    units: An int indicating the dimension of the logit layer.  In the MultiHead
      case, this should be the sum of all component Heads' logit dimensions.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    batch_norm: Whether to use batch normalization after each hidden layer.
    features: This is the first item returned from the `input_fn` passed to
      `train`, `evaluate`, and `predict`. This should be a single `Tensor` or
      `dict` of same.
    mode: Optional. Specifies if this training, evaluation or prediction. See
      `ModeKeys`.

Returns:
    A `Tensor` representing the logits, or a list of `Tensor`'s representing
      multiple logits in the MultiHead case.
    A list of trainable variables.
    A list of update ops.

Raises:
    ValueError: If units is not an int.
  """
  if not isinstance(units, six.integer_types):
    raise ValueError('units must be an int.  Given type: {}'.format(
        type(units)))
  dnn_model = _DNNModelV2(
      units,
      hidden_units,
      feature_columns,
      activation_fn,
      dropout,
      batch_norm,
      name='dnn')
  logits = dnn_model(features, mode)
  trainable_variables = dnn_model.trainable_variables
  update_ops = dnn_model.updates

return logits, trainable_variables, update_ops

def dnn_model_fn_v2(features,
                    labels,
                    mode,
                    head,
                    hidden_units,
                    feature_columns,
                    optimizer='Adagrad',
                    activation_fn=tf.nn.relu,
                    dropout=None,
                    config=None,
                    use_tpu=False,
                    batch_norm=False):
  """Deep Neural Net model_fn v2.

This function is different than _dnn_model_fn_v1 in the way it handles the
  optimizer when a String optimizer name is passed.

Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction. See
      `ModeKeys`.
    head: A `base_head.Head` instance.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    optimizer: String, `tf.keras.optimizers.Optimizer` object, or callable that
      creates the optimizer to use for training. If not specified, will use the
      Adagrad optimizer. If it is String, the default learning rate of the
      optimizer will be used. If it is String, and optimizer does not have a
      default learning rate, then, a fixed learning rate of 0.05 is used.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    config: `RunConfig` object to configure the runtime settings.
    use_tpu: Whether to make a DNN model able to run on TPU. Will make function
      return a `_TPUEstimatorSpec` instance and disable variable partitioning.
    batch_norm: Whether to use batch normalization after each hidden layer.

Returns:
    An `EstimatorSpec` instance.

Raises:
    ValueError: If features has the wrong type.
  """
  _validate_features(features)

del config

logits, trainable_variables, update_ops = _dnn_model_fn_builder_v2(
      units=head.logits_dimension,
      hidden_units=hidden_units,
      feature_columns=feature_columns,
      activation_fn=activation_fn,
      dropout=dropout,
      batch_norm=batch_norm,
      features=features,
      mode=mode)

# In TRAIN mode, create optimizer and assign global_step variable to
  # optimizer.iterations to make global_step increased correctly, as Hooks
  # relies on global step as step counter.
  if mode == ModeKeys.TRAIN:
    optimizer = optimizers.get_optimizer_instance_v2(optimizer)
    optimizer.iterations = tf.compat.v1.train.get_or_create_global_step()

# Create EstimatorSpec.
  if use_tpu:
    estimator_spec_fn = head._create_tpu_estimator_spec  # pylint: disable=protected-access
  else:
    estimator_spec_fn = head.create_estimator_spec  # pylint: disable=protected-access

return estimator_spec_fn(
      features=features,
      mode=mode,
      labels=labels,
      optimizer=optimizer,
      logits=logits,
      trainable_variables=trainable_variables,
      update_ops=update_ops)

@estimator_export('estimator.DNNClassifier', v1=[])
class DNNClassifierV2(estimator.EstimatorV2):
  """A classifier for TensorFlow DNN models.

Example:

```python
  categorical_feature_a = categorical_column_with_hash_bucket(...)
  categorical_feature_b = categorical_column_with_hash_bucket(...)

categorical_feature_a_emb = embedding_column(
      categorical_column=categorical_feature_a, ...)
  categorical_feature_b_emb = embedding_column(
      categorical_column=categorical_feature_b, ...)

estimator = tf.estimator.DNNClassifier(
      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
      hidden_units=[1024, 512, 256])

# Or estimator using the ProximalAdagradOptimizer optimizer with
  # regularization.
  estimator = tf.estimator.DNNClassifier(
      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
      hidden_units=[1024, 512, 256],
      optimizer=tf.compat.v1.train.ProximalAdagradOptimizer(
        learning_rate=0.1,
        l1_regularization_strength=0.001
      ))

# Or estimator using an optimizer with a learning rate decay.
  estimator = tf.estimator.DNNClassifier(
      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
      hidden_units=[1024, 512, 256],
      optimizer=lambda: tf.keras.optimizers.Adam(
          learning_rate=tf.compat.v1.train.exponential_decay(
              learning_rate=0.1,
              global_step=tf.compat.v1.train.get_global_step(),
              decay_steps=10000,
              decay_rate=0.96))

# Or estimator with warm-starting from a previous checkpoint.
  estimator = tf.estimator.DNNClassifier(
      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
      hidden_units=[1024, 512, 256],
      warm_start_from="/path/to/checkpoint/dir")

# Input builders
  def input_fn_train:
    # Returns tf.data.Dataset of (x, y) tuple where y represents label's class
    # index.
    pass
  def input_fn_eval:
    # Returns tf.data.Dataset of (x, y) tuple where y represents label's class
    # index.
    pass
  def input_fn_predict:
    # Returns tf.data.Dataset of (x, None) tuple.
    pass
  estimator.train(input_fn=input_fn_train)
  metrics = estimator.evaluate(input_fn=input_fn_eval)
  predictions = estimator.predict(input_fn=input_fn_predict)
  ```

Input of `train` and `evaluate` should have following features,
  otherwise there will be a `KeyError`:

* if `weight_column` is not `None`, a feature with `key=weight_column` whose
    value is a `Tensor`.
  * for each `column` in `feature_columns`:
    - if `column` is a `CategoricalColumn`, a feature with `key=column.name`
      whose `value` is a `SparseTensor`.
    - if `column` is a `WeightedCategoricalColumn`, two features: the first
      with `key` the id column name, the second with `key` the weight column
      name. Both features' `value` must be a `SparseTensor`.
    - if `column` is a `DenseColumn`, a feature with `key=column.name`
      whose `value` is a `Tensor`.

Loss is calculated by using softmax cross entropy.

@compatibility(eager)
  Estimators can be used while eager execution is enabled. Note that `input_fn`
  and all hooks are executed inside a graph context, so they have to be written
  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
  generally works in both graph and eager modes.
  @end_compatibility
  """

def __init__(
      self,
      hidden_units,
      feature_columns,
      model_dir=None,
      n_classes=2,
      weight_column=None,
      label_vocabulary=None,
      optimizer='Adagrad',
      activation_fn=tf.nn.relu,
      dropout=None,
      config=None,
      warm_start_from=None,
      loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
      batch_norm=False,
  ):
    """Initializes a `DNNClassifier` instance.

Args:
      hidden_units: Iterable of number hidden units per layer. All layers are
        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
        second one has 32.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `_FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
        weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are already
        encoded as integer or float within [0, 1] for `n_classes=2` and encoded
        as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also
        there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.keras.optimizers.*` used to train the model.
        Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp',
        SGD'), or callable. Defaults to Adagrad optimizer.
      activation_fn: Activation function applied to each layer. If `None`, will
        use `tf.nn.relu`.
      dropout: When not `None`, the probability we will drop out a given
        coordinate.
      config: `RunConfig` object to configure the runtime settings.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights are warm-started, and it is assumed that vocabularies and Tensor
        names are unchanged.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
      batch_norm: Whether to use batch normalization after each hidden layer.
    """
    head = head_utils.binary_or_multi_class_head(
        n_classes,
        weight_column=weight_column,
        label_vocabulary=label_vocabulary,
        loss_reduction=loss_reduction)
    estimator._canned_estimator_api_gauge.get_cell('Classifier').set('DNN')

def _model_fn(features, labels, mode, config):
      """Call the defined shared dnn_model_fn_v2."""
      return dnn_model_fn_v2(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          hidden_units=hidden_units,
          feature_columns=tuple(feature_columns or []),
          optimizer=optimizer,
          activation_fn=activation_fn,
          dropout=dropout,
          config=config,
          batch_norm=batch_norm)

super(DNNClassifierV2, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)

@estimator_export(v1=['estimator.DNNClassifier'])  # pylint: disable=missing-docstring
class DNNClassifier(estimator.Estimator):
  __doc__ = DNNClassifierV2.__doc__.replace('SUM_OVER_BATCH_SIZE', 'SUM')

def __init__(
      self,
      hidden_units,
      feature_columns,
      model_dir=None,
      n_classes=2,
      weight_column=None,
      label_vocabulary=None,
      optimizer='Adagrad',
      activation_fn=tf.nn.relu,
      dropout=None,
      input_layer_partitioner=None,
      config=None,
      warm_start_from=None,
      loss_reduction=tf.compat.v1.losses.Reduction.SUM,
      batch_norm=False,
  ):
    head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
        n_classes, weight_column, label_vocabulary, loss_reduction)
    estimator._canned_estimator_api_gauge.get_cell('Classifier').set('DNN')

def _model_fn(features, labels, mode, config):
      """Call the defined shared dnn_model_fn."""
      return _dnn_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          hidden_units=hidden_units,
          feature_columns=tuple(feature_columns or []),
          optimizer=optimizer,
          activation_fn=activation_fn,
          dropout=dropout,
          input_layer_partitioner=input_layer_partitioner,
          config=config,
          batch_norm=batch_norm)

super(DNNClassifier, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)

@estimator_export('estimator.DNNEstimator', v1=[])
class DNNEstimatorV2(estimator.EstimatorV2):
  """An estimator for TensorFlow DNN models with user-specified head.

Example:

```python
  sparse_feature_a = sparse_column_with_hash_bucket(...)
  sparse_feature_b = sparse_column_with_hash_bucket(...)

sparse_feature_a_emb = embedding_column(sparse_id_column=sparse_feature_a,
                                          ...)
  sparse_feature_b_emb = embedding_column(sparse_id_column=sparse_feature_b,
                                          ...)

estimator = tf.estimator.DNNEstimator(
      head=tf.estimator.MultiLabelHead(n_classes=3),
      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
      hidden_units=[1024, 512, 256])

# Or estimator using the ProximalAdagradOptimizer optimizer with
  # regularization.
  estimator = tf.estimator.DNNEstimator(
      head=tf.estimator.MultiLabelHead(n_classes=3),
      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
      hidden_units=[1024, 512, 256],
      optimizer=tf.compat.v1.train.ProximalAdagradOptimizer(
        learning_rate=0.1,
        l1_regularization_strength=0.001
      ))

# Or estimator using an optimizer with a learning rate decay.
  estimator = tf.estimator.DNNEstimator(
      head=tf.estimator.MultiLabelHead(n_classes=3),
      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
      hidden_units=[1024, 512, 256],
      optimizer=lambda: tf.keras.optimizers.Adam(
          learning_rate=tf.compat.v1.train.exponential_decay(
              learning_rate=0.1,
              global_step=tf.compat.v1.train.get_global_step(),
              decay_steps=10000,
              decay_rate=0.96))

# Or estimator with warm-starting from a previous checkpoint.
  estimator = tf.estimator.DNNEstimator(
      head=tf.estimator.MultiLabelHead(n_classes=3),
      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
      hidden_units=[1024, 512, 256],
      warm_start_from="/path/to/checkpoint/dir")

Input of `train` and `evaluate` should have following features,
  otherwise there will be a `KeyError`:

Loss and predicted output are determined by the specified head.

def __init__(self,
               head,
               hidden_units,
               feature_columns,
               model_dir=None,
               optimizer='Adagrad',
               activation_fn=tf.nn.relu,
               dropout=None,
               config=None,
               warm_start_from=None,
               batch_norm=False):
    """Initializes a `DNNEstimator` instance.

Args:
      head: A `_Head` instance constructed with a method such as
        `tf.contrib.estimator.multi_label_head`.
      hidden_units: Iterable of number hidden units per layer. All layers are
        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
        second one has 32.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `_FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      optimizer: An instance of `tf.keras.optimizers.*` used to train the model.
        Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp',
        SGD'), or callable. Defaults to Adagrad optimizer.
      activation_fn: Activation function applied to each layer. If `None`, will
        use `tf.nn.relu`.
      dropout: When not `None`, the probability we will drop out a given
        coordinate.
      config: `RunConfig` object to configure the runtime settings.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights are warm-started, and it is assumed that vocabularies and Tensor
        names are unchanged.
      batch_norm: Whether to use batch normalization after each hidden layer.
    """

estimator._canned_estimator_api_gauge.get_cell('Estimator').set('DNN')  # pylint: disable=protected-access
    super(DNNEstimatorV2, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)

@estimator_export(v1=['estimator.DNNEstimator'])  # pylint: disable=missing-docstring
class DNNEstimator(estimator.Estimator):
  __doc__ = DNNEstimatorV2.__doc__

def __init__(self,
               head,
               hidden_units,
               feature_columns,
               model_dir=None,
               optimizer='Adagrad',
               activation_fn=tf.nn.relu,
               dropout=None,
               input_layer_partitioner=None,
               config=None,
               warm_start_from=None,
               batch_norm=False):

def _model_fn(features, labels, mode, config):
      """Call the defined shared _dnn_model_fn."""
      return _dnn_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          hidden_units=hidden_units,
          feature_columns=tuple(feature_columns or []),
          optimizer=optimizer,
          activation_fn=activation_fn,
          dropout=dropout,
          input_layer_partitioner=input_layer_partitioner,
          config=config,
          batch_norm=batch_norm)

estimator._canned_estimator_api_gauge.get_cell('Estimator').set('DNN')  # pylint: disable=protected-access
    super(DNNEstimator, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)

@estimator_export('estimator.DNNRegressor', v1=[])
class DNNRegressorV2(estimator.EstimatorV2):
  """A regressor for TensorFlow DNN models.

Example:

```python
  categorical_feature_a = categorical_column_with_hash_bucket(...)
  categorical_feature_b = categorical_column_with_hash_bucket(...)

categorical_feature_a_emb = embedding_column(
      categorical_column=categorical_feature_a, ...)
  categorical_feature_b_emb = embedding_column(
      categorical_column=categorical_feature_b, ...)

estimator = tf.estimator.DNNRegressor(
      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
      hidden_units=[1024, 512, 256])

# Or estimator using the ProximalAdagradOptimizer optimizer with
  # regularization.
  estimator = tf.estimator.DNNRegressor(
      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
      hidden_units=[1024, 512, 256],
      optimizer=tf.compat.v1.train.ProximalAdagradOptimizer(
        learning_rate=0.1,
        l1_regularization_strength=0.001
      ))

# Or estimator using an optimizer with a learning rate decay.
  estimator = tf.estimator.DNNRegressor(
      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
      hidden_units=[1024, 512, 256],
      optimizer=lambda: tf.keras.optimizers.Adam(
          learning_rate=tf.compat.v1.train.exponential_decay(
              learning_rate=0.1,
              global_step=tf.compat.v1.train.get_global_step(),
              decay_steps=10000,
              decay_rate=0.96))

# Or estimator with warm-starting from a previous checkpoint.
  estimator = tf.estimator.DNNRegressor(
      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
      hidden_units=[1024, 512, 256],
      warm_start_from="/path/to/checkpoint/dir")

Input of `train` and `evaluate` should have following features,
  otherwise there will be a `KeyError`:

Loss is calculated by using mean squared error.

def __init__(
      self,
      hidden_units,
      feature_columns,
      model_dir=None,
      label_dimension=1,
      weight_column=None,
      optimizer='Adagrad',
      activation_fn=tf.nn.relu,
      dropout=None,
      config=None,
      warm_start_from=None,
      loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
      batch_norm=False,
  ):
    """Initializes a `DNNRegressor` instance.

Args:
      hidden_units: Iterable of number hidden units per layer. All layers are
        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
        second one has 32.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      weight_column: A string or a `NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
        weight_column.normalizer_fn is applied on it to get weight tensor.
      optimizer: An instance of `tf.keras.optimizers.*` used to train the model.
        Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp',
        SGD'), or callable. Defaults to Adagrad optimizer.
      activation_fn: Activation function applied to each layer. If `None`, will
        use `tf.nn.relu`.
      dropout: When not `None`, the probability we will drop out a given
        coordinate.
      config: `RunConfig` object to configure the runtime settings.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights are warm-started, and it is assumed that vocabularies and Tensor
        names are unchanged.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
      batch_norm: Whether to use batch normalization after each hidden layer.
    """
    head = regression_head.RegressionHead(
        label_dimension=label_dimension,
        weight_column=weight_column,
        loss_reduction=loss_reduction)
    estimator._canned_estimator_api_gauge.get_cell('Regressor').set('DNN')  # pylint: disable=protected-access

super(DNNRegressorV2, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)

@estimator_export(v1=['estimator.DNNRegressor'])  # pylint: disable=missing-docstring
class DNNRegressor(estimator.Estimator):
  __doc__ = DNNRegressorV2.__doc__.replace('SUM_OVER_BATCH_SIZE', 'SUM')

def __init__(
      self,
      hidden_units,
      feature_columns,
      model_dir=None,
      label_dimension=1,
      weight_column=None,
      optimizer='Adagrad',
      activation_fn=tf.nn.relu,
      dropout=None,
      input_layer_partitioner=None,
      config=None,
      warm_start_from=None,
      loss_reduction=tf.compat.v1.losses.Reduction.SUM,
      batch_norm=False,
  ):
    head = head_lib._regression_head(  # pylint: disable=protected-access
        label_dimension=label_dimension,
        weight_column=weight_column,
        loss_reduction=loss_reduction)
    estimator._canned_estimator_api_gauge.get_cell('Regressor').set('DNN')  # pylint: disable=protected-access

super(DNNRegressor, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)