EVOLUTION-MANAGER

Edit File: debug_mnist_v2.py

# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Demo of the tfdbg curses CLI: Locating the source of bad numerical values with TF v2.

This demo contains a classical example of a neural network for the mnist
dataset, but modifications are made so that problematic numerical values (infs
and nans) appear in nodes of the graph during training.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import sys

import absl
import tensorflow.compat.v2 as tf

IMAGE_SIZE = 28
HIDDEN_SIZE = 500
NUM_LABELS = 10

# If we set the weights randomly, the model will converge normally about half
# the time. We need a seed to ensure that the bad numerical values issue
# appears.
RAND_SEED = 42

tf.compat.v1.enable_v2_behavior()

FLAGS = None

def parse_args():
  """Parses commandline arguments.

Returns:
    A tuple (parsed, unparsed) of the parsed object and a group of unparsed
      arguments that did not match the parser.
  """
  parser = argparse.ArgumentParser()
  parser.register("type", "bool", lambda v: v.lower() == "true")
  parser.add_argument(
      "--max_steps",
      type=int,
      default=10,
      help="Number of steps to run trainer.")
  parser.add_argument(
      "--train_batch_size",
      type=int,
      default=100,
      help="Batch size used during training.")
  parser.add_argument(
      "--learning_rate",
      type=float,
      default=0.025,
      help="Initial learning rate.")
  parser.add_argument(
      "--data_dir",
      type=str,
      default="/tmp/mnist_data",
      help="Directory for storing data")
  parser.add_argument(
      "--fake_data",
      type="bool",
      nargs="?",
      const=True,
      default=False,
      help="Use fake MNIST data for unit testing")
  parser.add_argument(
      "--check_numerics",
      type="bool",
      nargs="?",
      const=True,
      default=False,
      help="Use tfdbg to track down bad values during training. "
      "Mutually exclusive with the --dump_dir flag.")
  parser.add_argument(
      "--dump_dir",
      type=str,
      default=None,
      help="Dump TensorFlow program debug data to the specified directory. "
      "The dumped data contains information regarding tf.function building, "
      "execution of ops and tf.functions, as well as their stack traces and "
      "associated source-code snapshots. "
      "Mutually exclusive with the --check_numerics flag.")
  parser.add_argument(
      "--dump_tensor_debug_mode",
      type=str,
      default="FULL_HEALTH",
      help="Mode for dumping tensor values. Options: NO_TENSOR, CURT_HEALTH, "
      "CONCISE_HEALTH, SHAPE, FULL_HEALTH. This is relevant only when "
      "--dump_dir is set.")
  # TODO(cais): Add more tensor debug mode strings once they are supported.
  parser.add_argument(
      "--dump_circular_buffer_size",
      type=int,
      default=-1,
      help="Size of the circular buffer used to dump execution events. "
      "A value <= 0 disables the circular-buffer behavior and causes "
      "all instrumented tensor values to be dumped. "
      "This is relevant only when --dump_dir is set.")
  parser.add_argument(
      "--use_random_config_path",
      type="bool",
      nargs="?",
      const=True,
      default=False,
      help="""If set, set config file path to a random file in the temporary
      directory.""")
  return parser.parse_known_args()

def main(_):
  if FLAGS.check_numerics and FLAGS.dump_dir:
    raise ValueError(
        "The --check_numerics and --dump_dir flags are mutually "
        "exclusive.")
  if FLAGS.check_numerics:
    tf.debugging.enable_check_numerics()
  elif FLAGS.dump_dir:
    tf.debugging.experimental.enable_dump_debug_info(
        FLAGS.dump_dir,
        tensor_debug_mode=FLAGS.dump_tensor_debug_mode,
        circular_buffer_size=FLAGS.dump_circular_buffer_size)

# Import data
  if FLAGS.fake_data:
    imgs = tf.random.uniform(maxval=256, shape=(1000, 28, 28), dtype=tf.int32)
    labels = tf.random.uniform(maxval=10, shape=(1000,), dtype=tf.int32)
    mnist_train = imgs, labels
    mnist_test = imgs, labels
  else:
    mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()

@tf.function
  def format_example(imgs, labels):
    """Formats each training and test example to work with our model."""
    imgs = tf.reshape(imgs, [-1, 28 * 28])
    imgs = tf.cast(imgs, tf.float32) / 255.0
    labels = tf.one_hot(labels, depth=10, dtype=tf.float32)
    return imgs, labels

train_ds = tf.data.Dataset.from_tensor_slices(mnist_train).shuffle(
      FLAGS.train_batch_size * FLAGS.max_steps,
      seed=RAND_SEED).batch(FLAGS.train_batch_size)
  train_ds = train_ds.map(format_example)

test_ds = tf.data.Dataset.from_tensor_slices(mnist_test).repeat().batch(
      len(mnist_test[0]))
  test_ds = test_ds.map(format_example)

def get_dense_weights(input_dim, output_dim):
    """Initializes the parameters for a single dense layer."""
    initial_kernel = tf.keras.initializers.TruncatedNormal(
        mean=0.0, stddev=0.1, seed=RAND_SEED)
    kernel = tf.Variable(initial_kernel([input_dim, output_dim]))
    bias = tf.Variable(tf.constant(0.1, shape=[output_dim]))

return kernel, bias

@tf.function
  def dense_layer(weights, input_tensor, act=tf.nn.relu):
    """Runs the forward computation for a single dense layer."""
    kernel, bias = weights
    preactivate = tf.matmul(input_tensor, kernel) + bias

activations = act(preactivate)
    return activations

# init model
  hidden_weights = get_dense_weights(IMAGE_SIZE**2, HIDDEN_SIZE)
  output_weights = get_dense_weights(HIDDEN_SIZE, NUM_LABELS)
  variables = hidden_weights + output_weights

@tf.function
  def model(x):
    """Feed forward function of the model.

Args:
      x: a (?, 28*28) tensor consisting of the feature inputs for a batch of
        examples.

Returns:
      A (?, 10) tensor containing the class scores for each example.
    """
    hidden_act = dense_layer(hidden_weights, x)
    logits_act = dense_layer(output_weights, hidden_act, tf.identity)
    y = tf.nn.softmax(logits_act)
    return y

@tf.function
  def loss(probs, labels):
    """Calculates cross entropy loss.

Args:
      probs: Class probabilities predicted by the model. The shape is expected
        to be (?, 10).
      labels: Truth labels for the classes, as one-hot encoded vectors. The
        shape is expected to be the same as `probs`.

Returns:
      A scalar loss tensor.
    """
    diff = -labels * tf.math.log(probs)
    loss = tf.reduce_mean(diff)
    return loss

train_batches = iter(train_ds)
  test_batches = iter(test_ds)
  optimizer = tf.optimizers.Adam(learning_rate=FLAGS.learning_rate)
  for i in range(FLAGS.max_steps):
    x_train, y_train = next(train_batches)
    x_test, y_test = next(test_batches)

# Train Step
    with tf.GradientTape() as tape:
      y = model(x_train)
      loss_val = loss(y, y_train)
    grads = tape.gradient(loss_val, variables)

optimizer.apply_gradients(zip(grads, variables))

# Evaluation Step
    y = model(x_test)
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_test, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("Accuracy at step %d: %s" % (i, accuracy.numpy()))

if __name__ == "__main__":
  FLAGS, unparsed = parse_args()
  absl.app.run(main=main, argv=[sys.argv[0]] + unparsed)