EVOLUTION-MANAGER

Edit File: audio_ops.h

// This file is MACHINE GENERATED! Do not edit.

#ifndef TENSORFLOW_CC_OPS_AUDIO_OPS_H_
#define TENSORFLOW_CC_OPS_AUDIO_OPS_H_

// This file is MACHINE GENERATED! Do not edit.

#include "tensorflow/cc/framework/ops.h"
#include "tensorflow/cc/framework/scope.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/lib/gtl/array_slice.h"

namespace tensorflow {
namespace ops {

/// @defgroup audio_ops Audio Ops
/// @{

/// Produces a visualization of audio data over time.
///
/// Spectrograms are a standard way of representing audio information as a series of
/// slices of frequency information, one slice for each window of time. By joining
/// these together into a sequence, they form a distinctive fingerprint of the sound
/// over time.
///
/// This op expects to receive audio data as an input, stored as floats in the range
/// -1 to 1, together with a window width in samples, and a stride specifying how
/// far to move the window between slices. From this it generates a three
/// dimensional output. The first dimension is for the channels in the input, so a
/// stereo audio input would have two here for example. The second dimension is time,
/// with successive frequency slices. The third dimension has an amplitude value for
/// each frequency during that time slice.
///
/// This means the layout when converted and saved as an image is rotated 90 degrees
/// clockwise from a typical spectrogram. Time is descending down the Y axis, and
/// the frequency decreases from left to right.
///
/// Each value in the result represents the square root of the sum of the real and
/// imaginary parts of an FFT on the current window of samples. In this way, the
/// lowest dimension represents the power of each frequency in the current window,
/// and adjacent windows are concatenated in the next dimension.
///
/// To get a more intuitive and visual look at what this operation does, you can run
/// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
/// resulting spectrogram as a PNG image.
///
/// Arguments:
/// * scope: A Scope object
/// * input: Float representation of audio data.
/// * window_size: How wide the input window is in samples. For the highest efficiency
/// this should be a power of two, but other values are accepted.
/// * stride: How widely apart the center of adjacent sample windows should be.
///
/// Optional attributes (see `Attrs`):
/// * magnitude_squared: Whether to return the squared magnitude or just the
/// magnitude. Using squared magnitude can avoid extra calculations.
///
/// Returns:
/// * `Output`: 3D representation of the audio frequencies as an image.
class AudioSpectrogram {
 public:
  /// Optional attribute setters for AudioSpectrogram
  struct Attrs {
    /// Whether to return the squared magnitude or just the
    /// magnitude. Using squared magnitude can avoid extra calculations.
    ///
    /// Defaults to false
    TF_MUST_USE_RESULT Attrs MagnitudeSquared(bool x) {
      Attrs ret = *this;
      ret.magnitude_squared_ = x;
      return ret;
    }

bool magnitude_squared_ = false;
  };
  AudioSpectrogram(const ::tensorflow::Scope& scope, ::tensorflow::Input input,
                 int64 window_size, int64 stride);
  AudioSpectrogram(const ::tensorflow::Scope& scope, ::tensorflow::Input input,
                 int64 window_size, int64 stride, const
                 AudioSpectrogram::Attrs& attrs);
  operator ::tensorflow::Output() const { return spectrogram; }
  operator ::tensorflow::Input() const { return spectrogram; }
  ::tensorflow::Node* node() const { return spectrogram.node(); }

static Attrs MagnitudeSquared(bool x) {
    return Attrs().MagnitudeSquared(x);
  }

Operation operation;
  ::tensorflow::Output spectrogram;
};

/// Decode a 16-bit PCM WAV file to a float tensor.
///
/// The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float.
///
/// When desired_channels is set, if the input contains fewer channels than this
/// then the last channel will be duplicated to give the requested number, else if
/// the input has more channels than requested then the additional channels will be
/// ignored.
///
/// If desired_samples is set, then the audio will be cropped or padded with zeroes
/// to the requested length.
///
/// The first output contains a Tensor with the content of the audio samples. The
/// lowest dimension will be the number of channels, and the second will be the
/// number of samples. For example, a ten-sample-long stereo WAV file should give an
/// output shape of [10, 2].
///
/// Arguments:
/// * scope: A Scope object
/// * contents: The WAV-encoded audio, usually from a file.
///
/// Optional attributes (see `Attrs`):
/// * desired_channels: Number of sample channels wanted.
/// * desired_samples: Length of audio requested.
///
/// Returns:
/// * `Output` audio: 2-D with shape `[length, channels]`.
/// * `Output` sample_rate: Scalar holding the sample rate found in the WAV header.
class DecodeWav {
 public:
  /// Optional attribute setters for DecodeWav
  struct Attrs {
    /// Number of sample channels wanted.
    ///
    /// Defaults to -1
    TF_MUST_USE_RESULT Attrs DesiredChannels(int64 x) {
      Attrs ret = *this;
      ret.desired_channels_ = x;
      return ret;
    }

/// Length of audio requested.
    ///
    /// Defaults to -1
    TF_MUST_USE_RESULT Attrs DesiredSamples(int64 x) {
      Attrs ret = *this;
      ret.desired_samples_ = x;
      return ret;
    }

int64 desired_channels_ = -1;
    int64 desired_samples_ = -1;
  };
  DecodeWav(const ::tensorflow::Scope& scope, ::tensorflow::Input contents);
  DecodeWav(const ::tensorflow::Scope& scope, ::tensorflow::Input contents, const
          DecodeWav::Attrs& attrs);

static Attrs DesiredChannels(int64 x) {
    return Attrs().DesiredChannels(x);
  }
  static Attrs DesiredSamples(int64 x) {
    return Attrs().DesiredSamples(x);
  }

Operation operation;
  ::tensorflow::Output audio;
  ::tensorflow::Output sample_rate;
};

/// Encode audio data using the WAV file format.
///
/// This operation will generate a string suitable to be saved out to create a .wav
/// audio file. It will be encoded in the 16-bit PCM format. It takes in float
/// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
/// that range.
///
/// `audio` is a 2-D float Tensor of shape `[length, channels]`.
/// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
///
/// Arguments:
/// * scope: A Scope object
/// * audio: 2-D with shape `[length, channels]`.
/// * sample_rate: Scalar containing the sample frequency.
///
/// Returns:
/// * `Output`: 0-D. WAV-encoded file contents.
class EncodeWav {
 public:
  EncodeWav(const ::tensorflow::Scope& scope, ::tensorflow::Input audio,
          ::tensorflow::Input sample_rate);
  operator ::tensorflow::Output() const { return contents; }
  operator ::tensorflow::Input() const { return contents; }
  ::tensorflow::Node* node() const { return contents.node(); }

Operation operation;
  ::tensorflow::Output contents;
};

/// Transforms a spectrogram into a form that's useful for speech recognition.
///
/// Mel Frequency Cepstral Coefficients are a way of representing audio data that's
/// been effective as an input feature for machine learning. They are created by
/// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the
/// higher frequencies that are less significant to the human ear. They have a long
/// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
/// is a good resource to learn more.
///
/// Arguments:
/// * scope: A Scope object
/// * spectrogram: Typically produced by the Spectrogram op, with magnitude_squared
/// set to true.
/// * sample_rate: How many samples per second the source audio used.
///
/// Optional attributes (see `Attrs`):
/// * upper_frequency_limit: The highest frequency to use when calculating the
/// ceptstrum.
/// * lower_frequency_limit: The lowest frequency to use when calculating the
/// ceptstrum.
/// * filterbank_channel_count: Resolution of the Mel bank used internally.
/// * dct_coefficient_count: How many output channels to produce per time slice.
///
/// Returns:
/// * `Output`: The output tensor.
class Mfcc {
 public:
  /// Optional attribute setters for Mfcc
  struct Attrs {
    /// The highest frequency to use when calculating the
    /// ceptstrum.
    ///
    /// Defaults to 4000
    TF_MUST_USE_RESULT Attrs UpperFrequencyLimit(float x) {
      Attrs ret = *this;
      ret.upper_frequency_limit_ = x;
      return ret;
    }

/// The lowest frequency to use when calculating the
    /// ceptstrum.
    ///
    /// Defaults to 20
    TF_MUST_USE_RESULT Attrs LowerFrequencyLimit(float x) {
      Attrs ret = *this;
      ret.lower_frequency_limit_ = x;
      return ret;
    }

/// Resolution of the Mel bank used internally.
    ///
    /// Defaults to 40
    TF_MUST_USE_RESULT Attrs FilterbankChannelCount(int64 x) {
      Attrs ret = *this;
      ret.filterbank_channel_count_ = x;
      return ret;
    }

/// How many output channels to produce per time slice.
    ///
    /// Defaults to 13
    TF_MUST_USE_RESULT Attrs DctCoefficientCount(int64 x) {
      Attrs ret = *this;
      ret.dct_coefficient_count_ = x;
      return ret;
    }

float upper_frequency_limit_ = 4000.0f;
    float lower_frequency_limit_ = 20.0f;
    int64 filterbank_channel_count_ = 40;
    int64 dct_coefficient_count_ = 13;
  };
  Mfcc(const ::tensorflow::Scope& scope, ::tensorflow::Input spectrogram,
     ::tensorflow::Input sample_rate);
  Mfcc(const ::tensorflow::Scope& scope, ::tensorflow::Input spectrogram,
     ::tensorflow::Input sample_rate, const Mfcc::Attrs& attrs);
  operator ::tensorflow::Output() const { return output; }
  operator ::tensorflow::Input() const { return output; }
  ::tensorflow::Node* node() const { return output.node(); }

static Attrs UpperFrequencyLimit(float x) {
    return Attrs().UpperFrequencyLimit(x);
  }
  static Attrs LowerFrequencyLimit(float x) {
    return Attrs().LowerFrequencyLimit(x);
  }
  static Attrs FilterbankChannelCount(int64 x) {
    return Attrs().FilterbankChannelCount(x);
  }
  static Attrs DctCoefficientCount(int64 x) {
    return Attrs().DctCoefficientCount(x);
  }

Operation operation;
  ::tensorflow::Output output;
};

/// @}

}  // namespace ops
}  // namespace tensorflow

#endif  // TENSORFLOW_CC_OPS_AUDIO_OPS_H_