EVOLUTION-MANAGER

Edit File: dnn_linear_combined.py

# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""TensorFlow estimators for Linear and DNN joined training models."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math

import six
import tensorflow as tf
from tensorflow.python.keras.utils import losses_utils
from tensorflow.python.util.tf_export import estimator_export
from tensorflow_estimator.python.estimator import estimator
from tensorflow_estimator.python.estimator.canned import dnn
from tensorflow_estimator.python.estimator.canned import head as head_lib
from tensorflow_estimator.python.estimator.canned import linear
from tensorflow_estimator.python.estimator.canned import optimizers
from tensorflow_estimator.python.estimator.head import head_utils
from tensorflow_estimator.python.estimator.head import regression_head
from tensorflow_estimator.python.estimator.mode_keys import ModeKeys

# The default learning rates are a historical artifact of the initial
# implementation.
_DNN_LEARNING_RATE = 0.001
_LINEAR_LEARNING_RATE = 0.005

def _check_no_sync_replicas_optimizer(optimizer):
  if isinstance(optimizer, tf.compat.v1.train.SyncReplicasOptimizer):
    raise ValueError(
        'SyncReplicasOptimizer does not support multi optimizers case. '
        'Therefore, it is not supported in DNNLinearCombined model. '
        'If you want to use this optimizer, please use either DNN or Linear '
        'model.')

def _linear_learning_rate(num_linear_feature_columns):
  """Returns the default learning rate of the linear model.

The calculation is a historical artifact of this initial implementation, but
  has proven a reasonable choice.

Args:
    num_linear_feature_columns: The number of feature columns of the linear
      model.

Returns:
    A float.
  """
  default_learning_rate = 1. / math.sqrt(num_linear_feature_columns)
  return min(_LINEAR_LEARNING_RATE, default_learning_rate)

def _add_layer_summary(value, tag):
  tf.compat.v1.summary.scalar('%s/fraction_of_zero_values' % tag,
                              tf.math.zero_fraction(value))
  tf.compat.v1.summary.histogram('%s/activation' % tag, value)

def _validate_feature_columns(linear_feature_columns, dnn_feature_columns):
  """Validates feature columns DNNLinearCombinedRegressor."""
  linear_feature_columns = linear_feature_columns or []
  dnn_feature_columns = dnn_feature_columns or []
  feature_columns = (list(linear_feature_columns) + list(dnn_feature_columns))
  if not feature_columns:
    raise ValueError('Either linear_feature_columns or dnn_feature_columns '
                     'must be defined.')
  return feature_columns

def _dnn_linear_combined_model_fn_v2(
    features,
    labels,
    mode,
    head,
    linear_feature_columns=None,
    linear_optimizer='Ftrl',
    dnn_feature_columns=None,
    dnn_optimizer='Adagrad',
    dnn_hidden_units=None,
    dnn_activation_fn=tf.nn.relu,
    dnn_dropout=None,
    config=None,
    batch_norm=False,
    linear_sparse_combiner='sum',
    loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE):
  """Deep Neural Net and Linear combined model_fn.

Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction. See
      `ModeKeys`.
    head: A `Head` instance.
    linear_feature_columns: An iterable containing all the feature columns used
      by the Linear model.
    linear_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the Linear model. Defaults to the Ftrl
      optimizer.
    dnn_feature_columns: An iterable containing all the feature columns used by
      the DNN model.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN model. Defaults to the Adagrad
      optimizer.
    dnn_hidden_units: List of hidden units per DNN layer.
    dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
      will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability we will drop out a given DNN
      coordinate.
    config: `RunConfig` object to configure the runtime settings.
    batch_norm: Whether to use batch normalization after each hidden layer.
    linear_sparse_combiner: A string specifying how to reduce the linear model
      if a categorical column is multivalent.  One of "mean", "sqrtn", and
      "sum".
    loss_reduction: One of `tf.keras.losses.Reduction` except `NONE`. Describes
      how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.

Returns:
    An `EstimatorSpec` instance.

Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing,
      or features has the wrong type.
  """
  if not isinstance(features, dict):
    raise ValueError('features should be a dictionary of `Tensor`s. '
                     'Given type: {}'.format(type(features)))
  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        'Either linear_feature_columns or dnn_feature_columns must be defined.')

del config

# Build DNN Logits.
  if not dnn_feature_columns:
    dnn_logits = None
  else:
    if mode == ModeKeys.TRAIN:
      dnn_optimizer = optimizers.get_optimizer_instance_v2(
          dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
      _check_no_sync_replicas_optimizer(dnn_optimizer)

if not dnn_hidden_units:
      raise ValueError(
          'dnn_hidden_units must be defined when dnn_feature_columns is '
          'specified.')
    dnn_logits, dnn_trainable_variables, dnn_update_ops = (
        dnn._dnn_model_fn_builder_v2(  # pylint: disable=protected-access
            units=head.logits_dimension,
            hidden_units=dnn_hidden_units,
            feature_columns=dnn_feature_columns,
            activation_fn=dnn_activation_fn,
            dropout=dnn_dropout,
            batch_norm=batch_norm,
            features=features,
            mode=mode))

if not linear_feature_columns:
    linear_logits = None
  else:
    if mode == ModeKeys.TRAIN:
      linear_optimizer = optimizers.get_optimizer_instance_v2(
          linear_optimizer,
          learning_rate=_linear_learning_rate(len(linear_feature_columns)))
      _check_no_sync_replicas_optimizer(linear_optimizer)

linear_logits, linear_trainable_variables = (
        linear._linear_model_fn_builder_v2(  # pylint: disable=protected-access
            units=head.logits_dimension,
            feature_columns=linear_feature_columns,
            sparse_combiner=linear_sparse_combiner,
            features=features))
    _add_layer_summary(linear_logits, 'linear')

# Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

def _train_op_fn(loss):
    """Returns the op to optimize the loss."""
    train_ops = []
    # Scale loss by number of replicas.
    if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE:
      loss = losses_utils.scale_loss_for_distribution(loss)

if dnn_logits is not None:
      train_ops.extend(dnn_optimizer.get_updates(loss, dnn_trainable_variables))
      if dnn_update_ops is not None:
        train_ops.extend(dnn_update_ops)
    if linear_logits is not None:
      train_ops.extend(
          linear_optimizer.get_updates(loss, linear_trainable_variables))
    train_op = tf.group(*train_ops)
    return train_op

# In TRAIN mode, asssign global_step variable to optimizer.iterations to
  # make global_step increased correctly, as Hooks relies on global step as
  # step counter. Note that, Only one model's optimizer needs this assignment.
  if mode == ModeKeys.TRAIN:
    if dnn_logits is not None:
      dnn_optimizer.iterations = tf.compat.v1.train.get_or_create_global_step()
    else:
      linear_optimizer.iterations = \
        tf.compat.v1.train.get_or_create_global_step()

return head.create_estimator_spec(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_train_op_fn,
      logits=logits)

def _dnn_linear_combined_model_fn(features,
                                  labels,
                                  mode,
                                  head,
                                  linear_feature_columns=None,
                                  linear_optimizer='Ftrl',
                                  dnn_feature_columns=None,
                                  dnn_optimizer='Adagrad',
                                  dnn_hidden_units=None,
                                  dnn_activation_fn=tf.nn.relu,
                                  dnn_dropout=None,
                                  input_layer_partitioner=None,
                                  config=None,
                                  batch_norm=False,
                                  linear_sparse_combiner='sum'):
  """Deep Neural Net and Linear combined model_fn.

Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction. See
      `ModeKeys`.
    head: A `Head` instance.
    linear_feature_columns: An iterable containing all the feature columns used
      by the Linear model.
    linear_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the Linear model. Defaults to the Ftrl
      optimizer.
    dnn_feature_columns: An iterable containing all the feature columns used by
      the DNN model.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN model. Defaults to the Adagrad
      optimizer.
    dnn_hidden_units: List of hidden units per DNN layer.
    dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
      will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability we will drop out a given DNN
      coordinate.
    input_layer_partitioner: Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.
    batch_norm: Whether to use batch normalization after each hidden layer.
    linear_sparse_combiner: A string specifying how to reduce the linear model
      if a categorical column is multivalent.  One of "mean", "sqrtn", and
      "sum".

Returns:
    An `EstimatorSpec` instance.

num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = input_layer_partitioner or (
      tf.compat.v1.min_max_variable_partitioner(
          max_partitions=num_ps_replicas, min_slice_size=64 << 20))

# Build DNN Logits.
  dnn_parent_scope = 'dnn'

if not dnn_feature_columns:
    dnn_logits = None
  else:
    dnn_optimizer = optimizers.get_optimizer_instance(
        dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
    _check_no_sync_replicas_optimizer(dnn_optimizer)
    if not dnn_hidden_units:
      raise ValueError(
          'dnn_hidden_units must be defined when dnn_feature_columns is '
          'specified.')
    dnn_partitioner = (
        tf.compat.v1.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with tf.compat.v1.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner) as scope:
      dnn_absolute_scope = scope.name
      dnn_logit_fn = dnn.dnn_logit_fn_builder(
          units=head.logits_dimension,
          hidden_units=dnn_hidden_units,
          feature_columns=dnn_feature_columns,
          activation_fn=dnn_activation_fn,
          dropout=dnn_dropout,
          batch_norm=batch_norm,
          input_layer_partitioner=input_layer_partitioner)
      dnn_logits = dnn_logit_fn(features=features, mode=mode)

linear_parent_scope = 'linear'

if not linear_feature_columns:
    linear_logits = None
  else:
    linear_optimizer = optimizers.get_optimizer_instance(
        linear_optimizer,
        learning_rate=_linear_learning_rate(len(linear_feature_columns)))
    _check_no_sync_replicas_optimizer(linear_optimizer)
    with tf.compat.v1.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner) as scope:
      linear_absolute_scope = scope.name
      logit_fn = linear.linear_logit_fn_builder(
          units=head.logits_dimension,
          feature_columns=linear_feature_columns,
          sparse_combiner=linear_sparse_combiner)
      linear_logits = logit_fn(features=features)
      _add_layer_summary(linear_logits, scope.name)

def _train_op_fn(loss):
    """Returns the op to optimize the loss."""
    train_ops = []
    global_step = tf.compat.v1.train.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          dnn_optimizer.minimize(
              loss,
              var_list=tf.compat.v1.get_collection(
                  tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES,
                  scope=dnn_absolute_scope)))
    if linear_logits is not None:
      train_ops.append(
          linear_optimizer.minimize(
              loss,
              var_list=tf.compat.v1.get_collection(
                  tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES,
                  scope=linear_absolute_scope)))

train_op = tf.group(*train_ops)
    with tf.control_dependencies([train_op]):
      return tf.compat.v1.assign_add(global_step, 1).op

return head.create_estimator_spec(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_train_op_fn,
      logits=logits)

@estimator_export('estimator.DNNLinearCombinedClassifier', v1=[])
class DNNLinearCombinedClassifierV2(estimator.EstimatorV2):
  """An estimator for TensorFlow Linear and DNN joined classification models.

Note: This estimator is also known as wide-n-deep.

Example:

```python
  numeric_feature = numeric_column(...)
  categorical_column_a = categorical_column_with_hash_bucket(...)
  categorical_column_b = categorical_column_with_hash_bucket(...)

categorical_feature_a_x_categorical_feature_b = crossed_column(...)
  categorical_feature_a_emb = embedding_column(
      categorical_column=categorical_feature_a, ...)
  categorical_feature_b_emb = embedding_column(
      categorical_id_column=categorical_feature_b, ...)

estimator = tf.estimator.DNNLinearCombinedClassifier(
      # wide settings
      linear_feature_columns=[categorical_feature_a_x_categorical_feature_b],
      linear_optimizer=tf.keras.optimizers.Ftrl(...),
      # deep settings
      dnn_feature_columns=[
          categorical_feature_a_emb, categorical_feature_b_emb,
          numeric_feature],
      dnn_hidden_units=[1000, 500, 100],
      dnn_optimizer=tf.keras.optimizers.Adagrad(...),
      # warm-start settings
      warm_start_from="/path/to/checkpoint/dir")

# To apply L1 and L2 regularization, you can set dnn_optimizer to:
  tf.compat.v1.train.ProximalAdagradOptimizer(
      learning_rate=0.1,
      l1_regularization_strength=0.001,
      l2_regularization_strength=0.001)
  # To apply learning rate decay, you can set dnn_optimizer to a callable:
  lambda: tf.keras.optimizers.Adam(
      learning_rate=tf.compat.v1.train.exponential_decay(
          learning_rate=0.1,
          global_step=tf.compat.v1.train.get_global_step(),
          decay_steps=10000,
          decay_rate=0.96)
  # It is the same for linear_optimizer.

# Input builders
  def input_fn_train:
    # Returns tf.data.Dataset of (x, y) tuple where y represents label's class
    # index.
    pass
  def input_fn_eval:
    # Returns tf.data.Dataset of (x, y) tuple where y represents label's class
    # index.
    pass
  def input_fn_predict:
    # Returns tf.data.Dataset of (x, None) tuple.
    pass
  estimator.train(input_fn=input_fn_train, steps=100)
  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
  predictions = estimator.predict(input_fn=input_fn_predict)
  ```

Input of `train` and `evaluate` should have following features,
  otherwise there will be a `KeyError`:

* for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
    - if `column` is a `CategoricalColumn`, a feature with `key=column.name`
      whose `value` is a `SparseTensor`.
    - if `column` is a `WeightedCategoricalColumn`, two features: the first
      with `key` the id column name, the second with `key` the weight column
      name. Both features' `value` must be a `SparseTensor`.
    - if `column` is a `DenseColumn`, a feature with `key=column.name`
      whose `value` is a `Tensor`.

Loss is calculated by using softmax cross entropy.

@compatibility(eager)
  Estimators can be used while eager execution is enabled. Note that `input_fn`
  and all hooks are executed inside a graph context, so they have to be written
  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
  generally works in both graph and eager modes.
  @end_compatibility
  """

def __init__(self,
               model_dir=None,
               linear_feature_columns=None,
               linear_optimizer='Ftrl',
               dnn_feature_columns=None,
               dnn_optimizer='Adagrad',
               dnn_hidden_units=None,
               dnn_activation_fn=tf.nn.relu,
               dnn_dropout=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               config=None,
               warm_start_from=None,
               loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
               batch_norm=False,
               linear_sparse_combiner='sum'):
    """Initializes a DNNLinearCombinedClassifier instance.

Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      linear_feature_columns: An iterable containing all the feature columns
        used by linear part of the model. All items in the set must be instances
        of classes derived from `FeatureColumn`.
      linear_optimizer: An instance of `tf.keras.optimizers.*` used to apply
        gradients to the linear part of the model. Can also be a string (one of
        'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to
        FTRL optimizer.
      dnn_feature_columns: An iterable containing all the feature columns used
        by deep part of the model. All items in the set must be instances of
        classes derived from `FeatureColumn`.
      dnn_optimizer: An instance of `tf.keras.optimizers.*` used to apply
        gradients to the deep part of the model. Can also be a string (one of
        'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to
        Adagrad optimizer.
      dnn_hidden_units: List of hidden units per layer. All layers are fully
        connected.
      dnn_activation_fn: Activation function applied to each layer. If None,
        will use `tf.nn.relu`.
      dnn_dropout: When not None, the probability we will drop out a given
        coordinate.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
        weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are already
        encoded as integer or float within [0, 1] for `n_classes=2` and encoded
        as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also
        there will be errors if vocabulary is not provided and labels are
        string.
      config: RunConfig object to configure the runtime settings.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights are warm-started, and it is assumed that vocabularies and Tensor
        names are unchanged.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
      batch_norm: Whether to use batch normalization after each hidden layer.
      linear_sparse_combiner: A string specifying how to reduce the linear model
        if a categorical column is multivalent.  One of "mean", "sqrtn", and
        "sum" -- these are effectively different ways to do example-level
        normalization, which can be useful for bag-of-words features.  For more
        details, see `tf.feature_column.linear_model`.

head = head_utils.binary_or_multi_class_head(
        n_classes,
        weight_column=weight_column,
        label_vocabulary=label_vocabulary,
        loss_reduction=loss_reduction)
    estimator._canned_estimator_api_gauge.get_cell('Classifier').set(  # pylint: disable=protected-access
        'DNNLinearCombined')

def _model_fn(features, labels, mode, config):
      """Call the _dnn_linear_combined_model_fn."""
      return _dnn_linear_combined_model_fn_v2(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          linear_feature_columns=linear_feature_columns,
          linear_optimizer=linear_optimizer,
          dnn_feature_columns=dnn_feature_columns,
          dnn_optimizer=dnn_optimizer,
          dnn_hidden_units=dnn_hidden_units,
          dnn_activation_fn=dnn_activation_fn,
          dnn_dropout=dnn_dropout,
          config=config,
          batch_norm=batch_norm,
          linear_sparse_combiner=linear_sparse_combiner,
          loss_reduction=loss_reduction)

super(DNNLinearCombinedClassifierV2, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)

@estimator_export(v1=['estimator.DNNLinearCombinedClassifier'])  # pylint: disable=missing-docstring
class DNNLinearCombinedClassifier(estimator.Estimator):
  __doc__ = DNNLinearCombinedClassifierV2.__doc__.replace(
      'SUM_OVER_BATCH_SIZE', 'SUM')

def __init__(self,
               model_dir=None,
               linear_feature_columns=None,
               linear_optimizer='Ftrl',
               dnn_feature_columns=None,
               dnn_optimizer='Adagrad',
               dnn_hidden_units=None,
               dnn_activation_fn=tf.nn.relu,
               dnn_dropout=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               input_layer_partitioner=None,
               config=None,
               warm_start_from=None,
               loss_reduction=tf.compat.v1.losses.Reduction.SUM,
               batch_norm=False,
               linear_sparse_combiner='sum'):
    self._feature_columns = _validate_feature_columns(
        linear_feature_columns=linear_feature_columns,
        dnn_feature_columns=dnn_feature_columns)

head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
        n_classes, weight_column, label_vocabulary, loss_reduction)
    estimator._canned_estimator_api_gauge.get_cell('Classifier').set(
        'DNNLinearCombined')  # pylint: disable=protected-access

def _model_fn(features, labels, mode, config):
      """Call the _dnn_linear_combined_model_fn."""
      return _dnn_linear_combined_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          linear_feature_columns=linear_feature_columns,
          linear_optimizer=linear_optimizer,
          dnn_feature_columns=dnn_feature_columns,
          dnn_optimizer=dnn_optimizer,
          dnn_hidden_units=dnn_hidden_units,
          dnn_activation_fn=dnn_activation_fn,
          dnn_dropout=dnn_dropout,
          input_layer_partitioner=input_layer_partitioner,
          config=config,
          batch_norm=batch_norm,
          linear_sparse_combiner=linear_sparse_combiner)

super(DNNLinearCombinedClassifier, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)

def _init_dnn_linear_combined_estimator(head, linear_feature_columns,
                                        linear_optimizer, dnn_feature_columns,
                                        dnn_optimizer, dnn_hidden_units,
                                        dnn_activation_fn, dnn_dropout,
                                        input_layer_partitioner,
                                        linear_sparse_combiner):
  """Helper function for the initialization of DNNLinearCombinedEstimator."""
  linear_feature_columns = linear_feature_columns or []
  dnn_feature_columns = dnn_feature_columns or []
  feature_columns = (list(linear_feature_columns) + list(dnn_feature_columns))
  if not feature_columns:
    raise ValueError('Either linear_feature_columns or dnn_feature_columns '
                     'must be defined.')

def _model_fn(features, labels, mode, config):
    """Call the _dnn_linear_combined_model_fn."""
    return _dnn_linear_combined_model_fn(
        features=features,
        labels=labels,
        mode=mode,
        head=head,
        linear_feature_columns=linear_feature_columns,
        linear_optimizer=linear_optimizer,
        dnn_feature_columns=dnn_feature_columns,
        dnn_optimizer=dnn_optimizer,
        dnn_hidden_units=dnn_hidden_units,
        dnn_activation_fn=dnn_activation_fn,
        dnn_dropout=dnn_dropout,
        input_layer_partitioner=input_layer_partitioner,
        config=config,
        linear_sparse_combiner=linear_sparse_combiner)

return feature_columns, _model_fn

@estimator_export('estimator.DNNLinearCombinedEstimator', v1=[])
class DNNLinearCombinedEstimatorV2(estimator.EstimatorV2):
  """An estimator for TensorFlow Linear and DNN joined models with custom head.

Note: This estimator is also known as wide-n-deep.

Example:

```python
  numeric_feature = numeric_column(...)
  categorical_column_a = categorical_column_with_hash_bucket(...)
  categorical_column_b = categorical_column_with_hash_bucket(...)

estimator = tf.estimator.DNNLinearCombinedEstimator(
      head=tf.estimator.MultiLabelHead(n_classes=3),
      # wide settings
      linear_feature_columns=[categorical_feature_a_x_categorical_feature_b],
      linear_optimizer=tf.keras.optimizers.Ftrl(...),
      # deep settings
      dnn_feature_columns=[
          categorical_feature_a_emb, categorical_feature_b_emb,
          numeric_feature],
      dnn_hidden_units=[1000, 500, 100],
      dnn_optimizer=tf.keras.optimizers.Adagrad(...))

Input of `train` and `evaluate` should have following features,
  otherwise there will be a `KeyError`:

Loss is calculated by using mean squared error.

Args:
      head: A `Head` instance constructed with a method such as
        `tf.estimator.MultiLabelHead`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into an estimator to
        continue training a previously saved model.
      linear_feature_columns: An iterable containing all the feature columns
        used by linear part of the model. All items in the set must be instances
        of classes derived from `FeatureColumn`.
      linear_optimizer: An instance of `tf.keras.optimizers.*` used to apply
        gradients to the linear part of the model. Can also be a string (one of
        'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to
        FTRL optimizer.
      dnn_feature_columns: An iterable containing all the feature columns used
        by deep part of the model. All items in the set must be instances of
        classes derived from `FeatureColumn`.
      dnn_optimizer: An instance of `tf.keras.optimizers.*` used to apply
        gradients to the deep part of the model. Can also be a string (one of
        'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to
        Adagrad optimizer.
      dnn_hidden_units: List of hidden units per layer. All layers are fully
        connected.
      dnn_activation_fn: Activation function applied to each layer. If None,
        will use `tf.nn.relu`.
      dnn_dropout: When not None, the probability we will drop out a given
        coordinate.
      config: RunConfig object to configure the runtime settings.
      linear_sparse_combiner: A string specifying how to reduce the linear model
        if a categorical column is multivalent.  One of "mean", "sqrtn", and
        "sum" -- these are effectively different ways to do example-level
        normalization, which can be useful for bag-of-words features.  For more
        details, see `tf.feature_column.linear_model`.

Raises:
      ValueError: If both linear_feature_columns and dnn_features_columns are
        empty at the same time.
    """
    self._feature_columns = _validate_feature_columns(
        linear_feature_columns=linear_feature_columns,
        dnn_feature_columns=dnn_feature_columns)
    estimator._canned_estimator_api_gauge.get_cell('Estimator').set(
        'DNNLinearCombined')  # pylint: disable=protected-access

super(DNNLinearCombinedEstimatorV2, self).__init__(
        model_fn=_model_fn, model_dir=model_dir, config=config)

@estimator_export(v1=['estimator.DNNLinearCombinedEstimator'])  # pylint: disable=missing-docstring
class DNNLinearCombinedEstimator(estimator.Estimator):
  __doc__ = DNNLinearCombinedEstimatorV2.__doc__

def __init__(self,
               head,
               model_dir=None,
               linear_feature_columns=None,
               linear_optimizer='Ftrl',
               dnn_feature_columns=None,
               dnn_optimizer='Adagrad',
               dnn_hidden_units=None,
               dnn_activation_fn=tf.nn.relu,
               dnn_dropout=None,
               input_layer_partitioner=None,
               config=None,
               linear_sparse_combiner='sum'):
    self._feature_columns = _validate_feature_columns(
        linear_feature_columns=linear_feature_columns,
        dnn_feature_columns=dnn_feature_columns)
    estimator._canned_estimator_api_gauge.get_cell('Estimator').set(
        'DNNLinearCombined')  # pylint: disable=protected-access

super(DNNLinearCombinedEstimator, self).__init__(
        model_fn=_model_fn, model_dir=model_dir, config=config)

@estimator_export('estimator.DNNLinearCombinedRegressor', v1=[])
class DNNLinearCombinedRegressorV2(estimator.EstimatorV2):
  """An estimator for TensorFlow Linear and DNN joined models for regression.

Note: This estimator is also known as wide-n-deep.

Example:

```python
  numeric_feature = numeric_column(...)
  categorical_column_a = categorical_column_with_hash_bucket(...)
  categorical_column_b = categorical_column_with_hash_bucket(...)

estimator = tf.estimator.DNNLinearCombinedRegressor(
      # wide settings
      linear_feature_columns=[categorical_feature_a_x_categorical_feature_b],
      linear_optimizer=tf.keras.optimizers.Ftrl(...),
      # deep settings
      dnn_feature_columns=[
          categorical_feature_a_emb, categorical_feature_b_emb,
          numeric_feature],
      dnn_hidden_units=[1000, 500, 100],
      dnn_optimizer=tf.keras.optimizers.Adagrad(...),
      # warm-start settings
      warm_start_from="/path/to/checkpoint/dir")

Input of `train` and `evaluate` should have following features,
  otherwise there will be a `KeyError`:

Loss is calculated by using mean squared error.

def __init__(self,
               model_dir=None,
               linear_feature_columns=None,
               linear_optimizer='Ftrl',
               dnn_feature_columns=None,
               dnn_optimizer='Adagrad',
               dnn_hidden_units=None,
               dnn_activation_fn=tf.nn.relu,
               dnn_dropout=None,
               label_dimension=1,
               weight_column=None,
               config=None,
               warm_start_from=None,
               loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
               batch_norm=False,
               linear_sparse_combiner='sum'):
    """Initializes a DNNLinearCombinedRegressor instance.

Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      linear_feature_columns: An iterable containing all the feature columns
        used by linear part of the model. All items in the set must be instances
        of classes derived from `FeatureColumn`.
      linear_optimizer: An instance of `tf.keras.optimizers.*` used to apply
        gradients to the linear part of the model. Can also be a string (one of
        'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to
        FTRL optimizer.
      dnn_feature_columns: An iterable containing all the feature columns used
        by deep part of the model. All items in the set must be instances of
        classes derived from `FeatureColumn`.
      dnn_optimizer: An instance of `tf.keras.optimizers.*` used to apply
        gradients to the deep part of the model. Can also be a string (one of
        'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to
        Adagrad optimizer.
      dnn_hidden_units: List of hidden units per layer. All layers are fully
        connected.
      dnn_activation_fn: Activation function applied to each layer. If None,
        will use `tf.nn.relu`.
      dnn_dropout: When not None, the probability we will drop out a given
        coordinate.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      weight_column: A string or a `NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
        weight_column.normalizer_fn is applied on it to get weight tensor.
      config: RunConfig object to configure the runtime settings.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights are warm-started, and it is assumed that vocabularies and Tensor
        names are unchanged.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
      batch_norm: Whether to use batch normalization after each hidden layer.
      linear_sparse_combiner: A string specifying how to reduce the linear model
        if a categorical column is multivalent.  One of "mean", "sqrtn", and
        "sum" -- these are effectively different ways to do example-level
        normalization, which can be useful for bag-of-words features.  For more
        details, see `tf.feature_column.linear_model`.

head = regression_head.RegressionHead(
        label_dimension=label_dimension,
        weight_column=weight_column,
        loss_reduction=loss_reduction)
    estimator._canned_estimator_api_gauge.get_cell('Regressor').set(
        'DNNLinearCombined')  # pylint: disable=protected-access

super(DNNLinearCombinedRegressorV2, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)

@estimator_export(v1=['estimator.DNNLinearCombinedRegressor'])  # pylint: disable=missing-docstring
class DNNLinearCombinedRegressor(estimator.Estimator):
  __doc__ = DNNLinearCombinedRegressorV2.__doc__.replace(
      'SUM_OVER_BATCH_SIZE', 'SUM')

def __init__(self,
               model_dir=None,
               linear_feature_columns=None,
               linear_optimizer='Ftrl',
               dnn_feature_columns=None,
               dnn_optimizer='Adagrad',
               dnn_hidden_units=None,
               dnn_activation_fn=tf.nn.relu,
               dnn_dropout=None,
               label_dimension=1,
               weight_column=None,
               input_layer_partitioner=None,
               config=None,
               warm_start_from=None,
               loss_reduction=tf.compat.v1.losses.Reduction.SUM,
               batch_norm=False,
               linear_sparse_combiner='sum'):
    self._feature_columns = _validate_feature_columns(
        linear_feature_columns=linear_feature_columns,
        dnn_feature_columns=dnn_feature_columns)
    estimator._canned_estimator_api_gauge.get_cell('Regressor').set(
        'DNNLinearCombined')  # pylint: disable=protected-access

head = head_lib._regression_head(  # pylint: disable=protected-access
        label_dimension=label_dimension,
        weight_column=weight_column,
        loss_reduction=loss_reduction)

super(DNNLinearCombinedRegressor, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)