EVOLUTION-MANAGER
Edit File: dnn_linear_combined.py
# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """TensorFlow estimators for Linear and DNN joined training models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import six import tensorflow as tf from tensorflow.python.keras.utils import losses_utils from tensorflow.python.util.tf_export import estimator_export from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import linear from tensorflow_estimator.python.estimator.canned import optimizers from tensorflow_estimator.python.estimator.head import head_utils from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys # The default learning rates are a historical artifact of the initial # implementation. _DNN_LEARNING_RATE = 0.001 _LINEAR_LEARNING_RATE = 0.005 def _check_no_sync_replicas_optimizer(optimizer): if isinstance(optimizer, tf.compat.v1.train.SyncReplicasOptimizer): raise ValueError( 'SyncReplicasOptimizer does not support multi optimizers case. ' 'Therefore, it is not supported in DNNLinearCombined model. ' 'If you want to use this optimizer, please use either DNN or Linear ' 'model.') def _linear_learning_rate(num_linear_feature_columns): """Returns the default learning rate of the linear model. The calculation is a historical artifact of this initial implementation, but has proven a reasonable choice. Args: num_linear_feature_columns: The number of feature columns of the linear model. Returns: A float. """ default_learning_rate = 1. / math.sqrt(num_linear_feature_columns) return min(_LINEAR_LEARNING_RATE, default_learning_rate) def _add_layer_summary(value, tag): tf.compat.v1.summary.scalar('%s/fraction_of_zero_values' % tag, tf.math.zero_fraction(value)) tf.compat.v1.summary.histogram('%s/activation' % tag, value) def _validate_feature_columns(linear_feature_columns, dnn_feature_columns): """Validates feature columns DNNLinearCombinedRegressor.""" linear_feature_columns = linear_feature_columns or [] dnn_feature_columns = dnn_feature_columns or [] feature_columns = (list(linear_feature_columns) + list(dnn_feature_columns)) if not feature_columns: raise ValueError('Either linear_feature_columns or dnn_feature_columns ' 'must be defined.') return feature_columns def _dnn_linear_combined_model_fn_v2( features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, config=None, batch_norm=False, linear_sparse_combiner='sum', loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. config: `RunConfig` object to configure the runtime settings. batch_norm: Whether to use batch normalization after each hidden layer. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum". loss_reduction: One of `tf.keras.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Returns: An `EstimatorSpec` instance. Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.') del config # Build DNN Logits. if not dnn_feature_columns: dnn_logits = None else: if mode == ModeKeys.TRAIN: dnn_optimizer = optimizers.get_optimizer_instance_v2( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_logits, dnn_trainable_variables, dnn_update_ops = ( dnn._dnn_model_fn_builder_v2( # pylint: disable=protected-access units=head.logits_dimension, hidden_units=dnn_hidden_units, feature_columns=dnn_feature_columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, batch_norm=batch_norm, features=features, mode=mode)) if not linear_feature_columns: linear_logits = None else: if mode == ModeKeys.TRAIN: linear_optimizer = optimizers.get_optimizer_instance_v2( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) linear_logits, linear_trainable_variables = ( linear._linear_model_fn_builder_v2( # pylint: disable=protected-access units=head.logits_dimension, feature_columns=linear_feature_columns, sparse_combiner=linear_sparse_combiner, features=features)) _add_layer_summary(linear_logits, 'linear') # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] # Scale loss by number of replicas. if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: loss = losses_utils.scale_loss_for_distribution(loss) if dnn_logits is not None: train_ops.extend(dnn_optimizer.get_updates(loss, dnn_trainable_variables)) if dnn_update_ops is not None: train_ops.extend(dnn_update_ops) if linear_logits is not None: train_ops.extend( linear_optimizer.get_updates(loss, linear_trainable_variables)) train_op = tf.group(*train_ops) return train_op # In TRAIN mode, asssign global_step variable to optimizer.iterations to # make global_step increased correctly, as Hooks relies on global step as # step counter. Note that, Only one model's optimizer needs this assignment. if mode == ModeKeys.TRAIN: if dnn_logits is not None: dnn_optimizer.iterations = tf.compat.v1.train.get_or_create_global_step() else: linear_optimizer.iterations = \ tf.compat.v1.train.get_or_create_global_step() return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) def _dnn_linear_combined_model_fn(features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None, batch_norm=False, linear_sparse_combiner='sum'): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. batch_norm: Whether to use batch normalization after each hidden layer. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum". Returns: An `EstimatorSpec` instance. Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( tf.compat.v1.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = ( tf.compat.v1.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with tf.compat.v1.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as scope: dnn_absolute_scope = scope.name dnn_logit_fn = dnn.dnn_logit_fn_builder( units=head.logits_dimension, hidden_units=dnn_hidden_units, feature_columns=dnn_feature_columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, batch_norm=batch_norm, input_layer_partitioner=input_layer_partitioner) dnn_logits = dnn_logit_fn(features=features, mode=mode) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with tf.compat.v1.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_absolute_scope = scope.name logit_fn = linear.linear_logit_fn_builder( units=head.logits_dimension, feature_columns=linear_feature_columns, sparse_combiner=linear_sparse_combiner) linear_logits = logit_fn(features=features) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = tf.compat.v1.train.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, var_list=tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_absolute_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=linear_absolute_scope))) train_op = tf.group(*train_ops) with tf.control_dependencies([train_op]): return tf.compat.v1.assign_add(global_step, 1).op return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) @estimator_export('estimator.DNNLinearCombinedClassifier', v1=[]) class DNNLinearCombinedClassifierV2(estimator.EstimatorV2): """An estimator for TensorFlow Linear and DNN joined classification models. Note: This estimator is also known as wide-n-deep. Example: ```python numeric_feature = numeric_column(...) categorical_column_a = categorical_column_with_hash_bucket(...) categorical_column_b = categorical_column_with_hash_bucket(...) categorical_feature_a_x_categorical_feature_b = crossed_column(...) categorical_feature_a_emb = embedding_column( categorical_column=categorical_feature_a, ...) categorical_feature_b_emb = embedding_column( categorical_id_column=categorical_feature_b, ...) estimator = tf.estimator.DNNLinearCombinedClassifier( # wide settings linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], linear_optimizer=tf.keras.optimizers.Ftrl(...), # deep settings dnn_feature_columns=[ categorical_feature_a_emb, categorical_feature_b_emb, numeric_feature], dnn_hidden_units=[1000, 500, 100], dnn_optimizer=tf.keras.optimizers.Adagrad(...), # warm-start settings warm_start_from="/path/to/checkpoint/dir") # To apply L1 and L2 regularization, you can set dnn_optimizer to: tf.compat.v1.train.ProximalAdagradOptimizer( learning_rate=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.001) # To apply learning rate decay, you can set dnn_optimizer to a callable: lambda: tf.keras.optimizers.Adam( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96) # It is the same for linear_optimizer. # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train, steps=100) metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using softmax cross entropy. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, n_classes=2, weight_column=None, label_vocabulary=None, config=None, warm_start_from=None, loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, batch_norm=False, linear_sparse_combiner='sum'): """Initializes a DNNLinearCombinedClassifier instance. Args: model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. linear_feature_columns: An iterable containing all the feature columns used by linear part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. linear_optimizer: An instance of `tf.keras.optimizers.*` used to apply gradients to the linear part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL optimizer. dnn_feature_columns: An iterable containing all the feature columns used by deep part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. dnn_optimizer: An instance of `tf.keras.optimizers.*` used to apply gradients to the deep part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad optimizer. dnn_hidden_units: List of hidden units per layer. All layers are fully connected. dnn_activation_fn: Activation function applied to each layer. If None, will use `tf.nn.relu`. dnn_dropout: When not None, the probability we will drop out a given coordinate. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. config: RunConfig object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. batch_norm: Whether to use batch normalization after each hidden layer. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. For more details, see `tf.feature_column.linear_model`. Raises: ValueError: If both linear_feature_columns and dnn_features_columns are empty at the same time. """ self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) head = head_utils.binary_or_multi_class_head( n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Classifier').set( # pylint: disable=protected-access 'DNNLinearCombined') def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn_v2( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner, loss_reduction=loss_reduction) super(DNNLinearCombinedClassifierV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export(v1=['estimator.DNNLinearCombinedClassifier']) # pylint: disable=missing-docstring class DNNLinearCombinedClassifier(estimator.Estimator): __doc__ = DNNLinearCombinedClassifierV2.__doc__.replace( 'SUM_OVER_BATCH_SIZE', 'SUM') def __init__(self, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, n_classes=2, weight_column=None, label_vocabulary=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, batch_norm=False, linear_sparse_combiner='sum'): self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Classifier').set( 'DNNLinearCombined') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) def _init_dnn_linear_combined_estimator(head, linear_feature_columns, linear_optimizer, dnn_feature_columns, dnn_optimizer, dnn_hidden_units, dnn_activation_fn, dnn_dropout, input_layer_partitioner, linear_sparse_combiner): """Helper function for the initialization of DNNLinearCombinedEstimator.""" linear_feature_columns = linear_feature_columns or [] dnn_feature_columns = dnn_feature_columns or [] feature_columns = (list(linear_feature_columns) + list(dnn_feature_columns)) if not feature_columns: raise ValueError('Either linear_feature_columns or dnn_feature_columns ' 'must be defined.') def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner, config=config, linear_sparse_combiner=linear_sparse_combiner) return feature_columns, _model_fn @estimator_export('estimator.DNNLinearCombinedEstimator', v1=[]) class DNNLinearCombinedEstimatorV2(estimator.EstimatorV2): """An estimator for TensorFlow Linear and DNN joined models with custom head. Note: This estimator is also known as wide-n-deep. Example: ```python numeric_feature = numeric_column(...) categorical_column_a = categorical_column_with_hash_bucket(...) categorical_column_b = categorical_column_with_hash_bucket(...) categorical_feature_a_x_categorical_feature_b = crossed_column(...) categorical_feature_a_emb = embedding_column( categorical_column=categorical_feature_a, ...) categorical_feature_b_emb = embedding_column( categorical_column=categorical_feature_b, ...) estimator = tf.estimator.DNNLinearCombinedEstimator( head=tf.estimator.MultiLabelHead(n_classes=3), # wide settings linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], linear_optimizer=tf.keras.optimizers.Ftrl(...), # deep settings dnn_feature_columns=[ categorical_feature_a_emb, categorical_feature_b_emb, numeric_feature], dnn_hidden_units=[1000, 500, 100], dnn_optimizer=tf.keras.optimizers.Adagrad(...)) # To apply L1 and L2 regularization, you can set dnn_optimizer to: tf.compat.v1.train.ProximalAdagradOptimizer( learning_rate=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.001) # To apply learning rate decay, you can set dnn_optimizer to a callable: lambda: tf.keras.optimizers.Adam( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96) # It is the same for linear_optimizer. # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train, steps=100) metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using mean squared error. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, head, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, config=None, linear_sparse_combiner='sum'): """Initializes a DNNLinearCombinedEstimator instance. Args: head: A `Head` instance constructed with a method such as `tf.estimator.MultiLabelHead`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. linear_feature_columns: An iterable containing all the feature columns used by linear part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. linear_optimizer: An instance of `tf.keras.optimizers.*` used to apply gradients to the linear part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL optimizer. dnn_feature_columns: An iterable containing all the feature columns used by deep part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. dnn_optimizer: An instance of `tf.keras.optimizers.*` used to apply gradients to the deep part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad optimizer. dnn_hidden_units: List of hidden units per layer. All layers are fully connected. dnn_activation_fn: Activation function applied to each layer. If None, will use `tf.nn.relu`. dnn_dropout: When not None, the probability we will drop out a given coordinate. config: RunConfig object to configure the runtime settings. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. For more details, see `tf.feature_column.linear_model`. Raises: ValueError: If both linear_feature_columns and dnn_features_columns are empty at the same time. """ self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) estimator._canned_estimator_api_gauge.get_cell('Estimator').set( 'DNNLinearCombined') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn_v2( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, config=config, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedEstimatorV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) @estimator_export(v1=['estimator.DNNLinearCombinedEstimator']) # pylint: disable=missing-docstring class DNNLinearCombinedEstimator(estimator.Estimator): __doc__ = DNNLinearCombinedEstimatorV2.__doc__ def __init__(self, head, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None, linear_sparse_combiner='sum'): self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) estimator._canned_estimator_api_gauge.get_cell('Estimator').set( 'DNNLinearCombined') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner, config=config, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedEstimator, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) @estimator_export('estimator.DNNLinearCombinedRegressor', v1=[]) class DNNLinearCombinedRegressorV2(estimator.EstimatorV2): """An estimator for TensorFlow Linear and DNN joined models for regression. Note: This estimator is also known as wide-n-deep. Example: ```python numeric_feature = numeric_column(...) categorical_column_a = categorical_column_with_hash_bucket(...) categorical_column_b = categorical_column_with_hash_bucket(...) categorical_feature_a_x_categorical_feature_b = crossed_column(...) categorical_feature_a_emb = embedding_column( categorical_column=categorical_feature_a, ...) categorical_feature_b_emb = embedding_column( categorical_column=categorical_feature_b, ...) estimator = tf.estimator.DNNLinearCombinedRegressor( # wide settings linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], linear_optimizer=tf.keras.optimizers.Ftrl(...), # deep settings dnn_feature_columns=[ categorical_feature_a_emb, categorical_feature_b_emb, numeric_feature], dnn_hidden_units=[1000, 500, 100], dnn_optimizer=tf.keras.optimizers.Adagrad(...), # warm-start settings warm_start_from="/path/to/checkpoint/dir") # To apply L1 and L2 regularization, you can set dnn_optimizer to: tf.compat.v1.train.ProximalAdagradOptimizer( learning_rate=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.001) # To apply learning rate decay, you can set dnn_optimizer to a callable: lambda: tf.keras.optimizers.Adam( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96) # It is the same for linear_optimizer. # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train, steps=100) metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using mean squared error. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, label_dimension=1, weight_column=None, config=None, warm_start_from=None, loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, batch_norm=False, linear_sparse_combiner='sum'): """Initializes a DNNLinearCombinedRegressor instance. Args: model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. linear_feature_columns: An iterable containing all the feature columns used by linear part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. linear_optimizer: An instance of `tf.keras.optimizers.*` used to apply gradients to the linear part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL optimizer. dnn_feature_columns: An iterable containing all the feature columns used by deep part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. dnn_optimizer: An instance of `tf.keras.optimizers.*` used to apply gradients to the deep part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad optimizer. dnn_hidden_units: List of hidden units per layer. All layers are fully connected. dnn_activation_fn: Activation function applied to each layer. If None, will use `tf.nn.relu`. dnn_dropout: When not None, the probability we will drop out a given coordinate. label_dimension: Number of regression targets per example. This is the size of the last dimension of the labels and logits `Tensor` objects (typically, these have shape `[batch_size, label_dimension]`). weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. config: RunConfig object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. batch_norm: Whether to use batch normalization after each hidden layer. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. For more details, see `tf.feature_column.linear_model`. Raises: ValueError: If both linear_feature_columns and dnn_features_columns are empty at the same time. """ self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) head = regression_head.RegressionHead( label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Regressor').set( 'DNNLinearCombined') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn_v2( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedRegressorV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export(v1=['estimator.DNNLinearCombinedRegressor']) # pylint: disable=missing-docstring class DNNLinearCombinedRegressor(estimator.Estimator): __doc__ = DNNLinearCombinedRegressorV2.__doc__.replace( 'SUM_OVER_BATCH_SIZE', 'SUM') def __init__(self, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, label_dimension=1, weight_column=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, batch_norm=False, linear_sparse_combiner='sum'): self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) estimator._canned_estimator_api_gauge.get_cell('Regressor').set( 'DNNLinearCombined') # pylint: disable=protected-access head = head_lib._regression_head( # pylint: disable=protected-access label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction) def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedRegressor, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from)