EVOLUTION-MANAGER

Edit File: cache.py

from __future__ import print_function, unicode_literals

import contextlib
import datetime
import json
import locale
import logging
import os
import sqlite3
import threading

import six
from six.moves.urllib.request import urlopen

from rbtools.api.errors import CacheError
from rbtools.utils.appdirs import user_cache_dir

MINIMUM_VERSION = '2.0.14'  # Minimum server version to enable the API cache.

_locale_lock = threading.Lock()  # Lock for getting / setting locale.

class CacheEntry(object):
    """An entry in the API Cache."""

DATE_FORMAT = '%Y-%m-%dT%H:%M:%S'  # ISO Date format

def __init__(self, url, vary_headers, max_age, etag, local_date,
                 last_modified, mime_type, item_mime_type, response_body):
        """Create a new cache entry."""
        self.url = url
        self.vary_headers = vary_headers
        self.max_age = max_age
        self.etag = etag
        self.local_date = local_date
        self.last_modified = last_modified
        self.mime_type = mime_type
        self.item_mime_type = item_mime_type
        self.response_body = response_body

def matches_request(self, request):
        """Determine if the cache entry matches the given request.

This is done by comparing the value of the headers field to the
        headers in the request
        """
        if self.vary_headers:
            for header, value in six.iteritems(self.vary_headers):
                if request.headers.get(header) != value:
                    return False

return True

def up_to_date(self):
        """Determine if the cache entry is up to date."""
        if self.max_age is not None:
            max_age = datetime.timedelta(seconds=self.max_age)
            return self.local_date + max_age > datetime.datetime.now()

return True

class HTTPResponse(object):
    """An uncached HTTP response that can be read() more than once.

This is intended to be API-compatible with a urllib2 response object. This
    allows a response to be read more than once.
    """
    def __init__(self, response):
        """Extract the data from a urllib2 HTTP response."""
        self.headers = response.info()
        self.content = response.read()
        self.code = response.getcode()

def info(self):
        """Get the headers associated with the response."""
        return self.headers

def read(self):
        """Get the content associated with the response."""
        return self.content

def getcode(self):
        """Get the associated HTTP response code."""
        return self.code

class CachedHTTPResponse(object):
    """A response returned from the APICache.

This is intended to be API-compatible with a urllib2 response object.
    """
    def __init__(self, cache_entry):
        """Create a new CachedResponse from the given CacheEntry."""
        self.headers = {
            'Content-Type': cache_entry.mime_type,
            'Item-Content-Type': cache_entry.item_mime_type,
        }

self.content = cache_entry.response_body

def info(self):
        """Get the headers associated with the response."""
        return self.headers

def read(self):
        """Get the content associated with the response."""
        return self.content

def getcode(self):
        """Get the associated HTTP response code, which is always 200.

This method returns 200 because it is pretending that it made a
        successful HTTP request.
        """
        return 200

class APICache(object):
    """An API cache backed by a SQLite database."""

# The format for the Expires: header. Requires an English locale.
    EXPIRES_FORMAT = '%a, %d %b %Y %H:%M:%S %Z'

DEFAULT_CACHE_DIR = user_cache_dir('rbtools')
    DEFAULT_CACHE_PATH = os.path.join(DEFAULT_CACHE_DIR, 'apicache.db')

# The API Cache's schema version. If the schema is updated, update this
    # value.
    SCHEMA_VERSION = 2

def __init__(self, create_db_in_memory=False, db_location=None,
                 urlopen=urlopen):
        """Create a new instance of the APICache

If the db_path is provided, it will be used as the path to the SQLite
        database; otherwise, the default cache (in the CACHE_DIR) will be used.
        The urlopen parameter determines the method that is used to open URLs.
        """
        self.urlopen = urlopen

if create_db_in_memory:
            logging.debug('Creating API cache in memory.')

self.db = sqlite3.connect(':memory:')
            self.cache_path = None
            self._create_schema()
        else:
            self.cache_path = db_location or self.DEFAULT_CACHE_PATH

try:
                cache_exists = os.path.exists(self.cache_path)
                create_schema = True

if not cache_exists:
                    cache_dir = os.path.dirname(self.cache_path)

if not os.path.exists(cache_dir):
                        logging.debug('Cache directory "%s" does not exist; '
                                      'creating.',
                                      cache_dir)
                        os.makedirs(cache_dir)

logging.debug('API cache "%s" does not exist; creating.',
                                  self.cache_path)

self.db = sqlite3.connect(self.cache_path)

if cache_exists:
                    try:
                        with contextlib.closing(self.db.cursor()) as c:
                            c.execute('SELECT version FROM cache_info')
                            row = c.fetchone()

if row and row[0] == self.SCHEMA_VERSION:
                                create_schema = False
                    except sqlite3.Error as e:
                        self._die(
                            'Could not get the HTTP cache schema version', e)

if create_schema:
                    self._create_schema()
            except (OSError, sqlite3.Error):
                # OSError will be thrown if we cannot create the directory or
                # file for the API cache. sqlite3.Error will be thrown if
                # connect fails. In either case, HTTP requests can still be
                # made, they will just passed through to the URL opener without
                # attempting to interact with the API cache.
                logging.warn('Could not create or access API cache "%s". Try '
                             'running "rbt clear-cache" to clear the HTTP '
                             'cache for the API.',
                             self.cache_path)

if self.db is not None:
            self.db.row_factory = APICache._row_factory

def make_request(self, request):
        """Perform the specified request.

If there is an up-to-date cached entry in our store, a CachedResponse
        will be returned. Otherwise, The urlopen method will be used to
        execute the request and a CachedResponse (if our entry is still up to
        date) or a Response (if it is not) will be returned.
        """
        if self.db is None or request.method != 'GET':
            # We can only cache HTTP GET requests and only if we were able to
            # access the API cache database.
            return self.urlopen(request)

entry = self._get_entry(request)

if entry:
            if entry.up_to_date():
                logging.debug('Cached response for HTTP GET %s up to date',
                              request.get_full_url())
                response = CachedHTTPResponse(entry)
            else:
                if entry.etag:
                    request.add_header('If-none-match', entry.etag)

if entry.last_modified:
                    request.add_header('If-modified-since',
                                       entry.last_modified)

response = HTTPResponse(self.urlopen(request))

if response.getcode() == 304:
                    logging.debug('Cached response for HTTP GET %s expired '
                                  'and was not modified',
                                  request.get_full_url())
                    entry.local_date = datetime.datetime.now()
                    self._save_entry(entry)
                    response = CachedHTTPResponse(entry)
                elif 200 <= response.getcode() < 300:
                    logging.debug('Cached response for HTTP GET %s expired '
                                  'and was modified',
                                  request.get_full_url())
                    response_headers = response.info()
                    cache_info = self._get_caching_info(request.headers,
                                                        response_headers)

if cache_info:
                        entry.max_age = cache_info['max_age']
                        entry.etag = cache_info['etag']
                        entry.local_date = datetime.datetime.now()
                        entry.last_modified = cache_info['last_modified']

entry.mime_type = response_headers['Content-Type']
                        entry.item_mime_type = \
                            response_headers.get('Item-Content-Type')
                        entry.response_body = response.read()

if entry.vary_headers != cache_info['vary_headers']:
                            # The Vary: header has changed since the last time
                            # we retrieved the resource so we need to remove
                            # the old cache entry and save the new one.
                            self._delete_entry(entry)
                            entry.vary_headers = cache_info['vary_headers']

self._save_entry(entry)
                    else:
                        # This resource is no longer cache-able so we should
                        # delete our cached version.
                        logging.debug('Cached response for HTTP GET request '
                                      'to %s is no longer cacheable',
                                      request.get_full_url())
                        self._delete_entry(entry)
        else:
            response = HTTPResponse(self.urlopen(request))
            response_headers = response.info()

cache_info = self._get_caching_info(request.headers,
                                                response_headers)

if cache_info:
                self._save_entry(CacheEntry(
                    request.get_full_url(),
                    cache_info['vary_headers'],
                    cache_info['max_age'],
                    cache_info['etag'],
                    datetime.datetime.now(),
                    cache_info['last_modified'],
                    response_headers.get('Content-Type'),
                    response_headers.get('Item-Content-Type'),
                    response.read()))

logging.debug('Added cache entry for HTTP GET request to %s',
                              request.get_full_url())

else:
                logging.debug('HTTP GET request to %s cannot be cached',
                              request.get_full_url())

return response

def _get_caching_info(self, request_headers, response_headers):
        """Get the caching info for the response to the given request.

A dictionary with caching information is returned, or None if the
        response cannot be cached.
        """
        max_age = None
        no_cache = False

expires = response_headers.get('Expires')

if expires:
            # We switch to the C locale to parse the 'Expires' header because
            # the formatting specifiers are locale specific and the header
            # *must* be provided in English. After parsing the header, we
            # restore the locale to the user's previous locale.
            #
            # We also note that changing the locale is not thread-safe so we
            # use a lock around this.
            with _locale_lock:
                old_locale = locale.setlocale(locale.LC_TIME)

try:
                    # 'setlocale' requires the second parameter to be a 'str'
                    # in both Python 2.x and Python 3+.
                    locale.setlocale(locale.LC_TIME, str('C'))
                    expires = datetime.datetime.strptime(expires,
                                                         self.EXPIRES_FORMAT)

# We assign to max_age because the value of max-age in the
                    # Cache-Control header overrides the behaviour of the
                    # 'Expires' header.
                    now = datetime.datetime.now()

if expires < now:
                        max_age = 0
                    else:
                        max_age = (expires - now).seconds
                except ValueError:
                    logging.error('The format of the "Expires" header (value '
                                  '%s) does not match the expected format.',
                                  expires)
                except locale.Error:
                    logging.error('The C locale is unavailable on this '
                                  'system. The "Expires" header cannot be '
                                  'parsed.')
                finally:
                    locale.setlocale(locale.LC_TIME, old_locale)

# The value of the Cache-Control header is a list of comma separated
        # values. We only care about some of them, notably max-age, no-cache,
        # no-store, and must-revalidate. The other values are only applicable
        # to intermediaries.
        for kvp in self._split_csv(response_headers.get('Cache-Control', '')):
            if kvp.startswith('max-age'):
                max_age = int(kvp.split('=')[1].strip())
            elif kvp.startswith('no-cache'):
                # The no-cache specifier optionally has an associated header
                # that we shouldn't cache. However, the *only* headers we are
                # caching are headers that describe the the cached content:
                # Content-Type, and Item-Content-Type.
                no_cache = True
            elif kvp == 'no-store':
                # If no-store is specified, we cannot cache anything about this
                # resource.
                return None
            elif kvp == 'must-revalidate':
                # We treat must-revalidate identical to no-cache because we are
                # not an intermediary.
                no_cache = True

# The Pragma: header is an obsolete header that may contain the value
        # no-cache, which is equivalent to Cache-Control: no-cache. We check
        # for it for posterity's sake.
        if 'no-cache' in response_headers.get('Pragma', ''):
            no_cache = True

etag = response_headers.get('ETag')
        last_modified = response_headers.get('Last-Modified')
        vary_headers = response_headers.get('Vary')

# The Vary header specifies a list of headers that *may* alter the
        # returned response. The cached response can only be used when these
        # headers have the same value as those provided in the request.
        if vary_headers:
            vary_headers = dict(
                (header, request_headers.get(header))
                for header in self._split_csv(vary_headers)
            )
        else:
            vary_headers = {}

if no_cache:
            # If no-cache is specified, the resource must always be requested,
            # so we will treat this as if the max_age is zero.
            max_age = 0

if no_cache and not etag and not last_modified:
            # We have no information with which to provide the server to check
            # if our content is up to date. Therefore, the information cannot
            # be cached.
            return None

return {
            'max_age': max_age,
            'etag': etag,
            'last_modified': last_modified,
            'vary_headers': vary_headers
        }

def _create_schema(self):
        """Create the schema for the API cache database."""
        try:
            with contextlib.closing(self.db.cursor()) as c:
                c.execute('DROP TABLE IF EXISTS api_cache')
                c.execute('DROP TABLE IF EXISTS cache_info')

c.execute('''CREATE TABLE api_cache(
                                 url            TEXT,
                                 vary_headers   TEXT,
                                 max_age        INTEGER,
                                 etag           TEXT,
                                 local_date     TEXT,
                                 last_modified  TEXT,
                                 mime_type      TEXT,
                                 item_mime_type TEXT,
                                 response_body  BLOB,
                                 PRIMARY KEY(url, vary_headers)
                             )''')

c.execute('CREATE TABLE cache_info(version INTEGER)')

c.execute('INSERT INTO cache_info(version) VALUES(?)',
                          (self.SCHEMA_VERSION,))

self._write_db()
        except sqlite3.Error as e:
            self._die('Could not create database schema for the HTTP cache', e)

def _get_entry(self, request):
        """Find an entry in the API cache store that matches the request.

If no such cache entry exists, this returns None.
        """
        url = request.get_full_url()

try:
            with contextlib.closing(self.db.cursor()) as c:
                for row in c.execute('SELECT * FROM api_cache WHERE url=?',
                                     (url,)):
                    if row.matches_request(request):
                        return row
        except sqlite3.Error as e:
            self._die('Could not retrieve an entry from the HTTP cache', e)

return None

def _save_entry(self, entry):
        """Save the entry into the store.

If the entry already exists in the store, do an UPDATE; otherwise do an
        INSERT. This does not commit to the database.
        """
        vary_headers = json.dumps(entry.vary_headers)
        local_date = entry.local_date.strftime(entry.DATE_FORMAT)

try:
            with contextlib.closing(self.db.cursor()) as c:
                try:
                    c.execute('''INSERT INTO api_cache (url,
                                                        vary_headers,
                                                        max_age,
                                                        etag,
                                                        local_date,
                                                        last_modified,
                                                        mime_type,
                                                        item_mime_type,
                                                        response_body)
                                 VALUES(?,?,?,?,?,?,?,?,?)''',
                              (entry.url, vary_headers, entry.max_age,
                               entry.etag, local_date, entry.last_modified,
                               entry.mime_type, entry.item_mime_type,
                               sqlite3.Binary(entry.response_body)))
                except sqlite3.IntegrityError:
                    c.execute('''UPDATE api_cache
                                 SET max_age=?,
                                     etag=?,
                                     local_date=?,
                                     last_modified=?,
                                     mime_type=?,
                                     item_mime_type=?,
                                     response_body=?
                                 WHERE url=? AND vary_headers=?''',
                              (entry.max_age, entry.etag, local_date,
                               entry.last_modified, entry.mime_type,
                               entry.item_mime_type,
                               sqlite3.Binary(entry.response_body), entry.url,
                               vary_headers))

self._write_db()
        except sqlite3.Error as e:
            self._die('Could not write entry to the HTTP cache for the API', e)

def _delete_entry(self, entry):
        """Remove the entry from the store."""
        try:
            with contextlib.closing(self.db.cursor()) as c:
                c.execute(
                    'DELETE FROM api_cache WHERE URL=? AND vary_headers=?',
                    (entry.url, json.dumps(entry.vary_headers)))

self._write_db()
        except sqlite3.Error as e:
            self._die('Could not delete entry from the HTTP cache for the API',
                      e)

@staticmethod
    def _row_factory(cursor, row):
        """A factory for creating individual Cache Entries from db rows."""
        return CacheEntry(
            url=row[0],
            vary_headers=json.loads(row[1]),
            max_age=row[2],
            etag=row[3],
            local_date=datetime.datetime.strptime(row[4],
                                                  CacheEntry.DATE_FORMAT),
            last_modified=row[5],
            mime_type=row[6],
            item_mime_type=row[7],
            response_body=six.binary_type(row[8]),
        )

def _write_db(self):
        """Flush the contents of the DB to the disk."""
        if self.db:
            try:
                self.db.commit()
            except sqlite3.Error as e:
                self._die('Could not write database to disk', e)

def _die(self, message, inner_exception):
        """Build an appropriate CacheError and raise it."""
        message = '%s: %s.' % (message, inner_exception)

if self.cache_path:
            if self.cache_path == APICache.DEFAULT_CACHE_PATH:
                cache_args = ''
            else:
                cache_args = ' --cache-location %s' % self.cache_path

message += (' Try running "rbt clear-cache%s" to manually clear '
                        'the HTTP Cache for the API.'
                        % cache_args)

raise CacheError(message)

def _split_csv(self, csvline):
        """Split a line of comma-separated values into a list."""
        return [
            s.strip()
            for s in csvline.split(',')
        ]

def clear_cache(cache_path=APICache.DEFAULT_CACHE_PATH):
    """Delete the HTTP cache used for the API."""
    try:
        os.unlink(cache_path)
        print('Cleared cache in "%s"' % cache_path)
    except Exception as e:
        logging.error('Could not clear cache in "%s": %s. Try manually '
                      'removing it if it exists.',
                      cache_path, e)