EVOLUTION-MANAGER

Edit File: cpl_csv.cpp

/******************************************************************************
 *
 * Project:  CPL - Common Portability Library
 * Purpose:  CSV (comma separated value) file access.
 * Author:   Frank Warmerdam, warmerdam@pobox.com
 *
 ******************************************************************************
 * Copyright (c) 1999, Frank Warmerdam
 * Copyright (c) 2009-2012, Even Rouault <even dot rouault at mines-paris dot org>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 ****************************************************************************/

#include "cpl_port.h"
#include "cpl_csv.h"

#include <cstddef>
#include <cstdlib>
#include <cstring>
#if HAVE_FCNTL_H
#include <fcntl.h>
#endif

#include "cpl_conv.h"
#include "cpl_error.h"
#include "cpl_multiproc.h"
#include "gdal_csv.h"

CPL_CVSID("$Id: cpl_csv.cpp 37621 2017-03-06 11:38:25Z rouault $");

/* ==================================================================== */
/*      The CSVTable is a persistent set of info about an open CSV      */
/*      table.  While it doesn't currently maintain a record index,     */
/*      or in-memory copy of the table, it could be changed to do so    */
/*      in the future.                                                  */
/* ==================================================================== */
typedef struct ctb {
    VSILFILE   *fp;
    struct ctb *psNext;
    char       *pszFilename;
    char      **papszFieldNames;
    char      **papszRecFields;
    int         iLastLine;
    bool        bNonUniqueKey;

/* Cache for whole file */
    int         nLineCount;
    char      **papszLines;
    int        *panLineIndex;
    char       *pszRawData;
} CSVTable;

static void CSVDeaccessInternal( CSVTable **ppsCSVTableList, bool bCanUseTLS,
                                 const char * pszFilename );

/************************************************************************/
/*                            CSVFreeTLS()                              */
/************************************************************************/
static void CSVFreeTLS( void* pData )
{
    CSVDeaccessInternal( static_cast<CSVTable **>( pData ), false, NULL );
    CPLFree(pData);
}

/* It would likely be better to share this list between threads, but
   that will require some rework. */

/************************************************************************/
/*                             CSVAccess()                              */
/*                                                                      */
/*      This function will fetch a handle to the requested table.       */
/*      If not found in the ``open table list'' the table will be       */
/*      opened and added to the list.  Eventually this function may     */
/*      become public with an abstracted return type so that            */
/*      applications can set options about the table.  For now this     */
/*      isn't done.                                                     */
/************************************************************************/

static CSVTable *CSVAccess( const char * pszFilename )

{
/* -------------------------------------------------------------------- */
/*      Fetch the table, and allocate the thread-local pointer to it    */
/*      if there isn't already one.                                     */
/* -------------------------------------------------------------------- */
    int bMemoryError = FALSE;
    CSVTable **ppsCSVTableList = static_cast<CSVTable **>(
        CPLGetTLSEx( CTLS_CSVTABLEPTR, &bMemoryError ) );
    if( bMemoryError )
        return NULL;
    if( ppsCSVTableList == NULL )
    {
        ppsCSVTableList = static_cast<CSVTable **>(
            VSI_CALLOC_VERBOSE( 1, sizeof(CSVTable*) ) );
        if( ppsCSVTableList == NULL )
            return NULL;
        CPLSetTLSWithFreeFunc( CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS );
    }

/* -------------------------------------------------------------------- */
/*      Is the table already in the list.                               */
/* -------------------------------------------------------------------- */
    for( CSVTable *psTable = *ppsCSVTableList;
         psTable != NULL;
         psTable = psTable->psNext )
    {
        if( EQUAL(psTable->pszFilename, pszFilename) )
        {
            /*
             * Eventually we should consider promoting to the front of
             * the list to accelerate frequently accessed tables.
             */
            return psTable;
        }
    }

/* -------------------------------------------------------------------- */
/*      If not, try to open it.                                         */
/* -------------------------------------------------------------------- */
    VSILFILE *fp = VSIFOpenL( pszFilename, "rb" );
    if( fp == NULL )
        return NULL;

/* -------------------------------------------------------------------- */
/*      Create an information structure about this table, and add to    */
/*      the front of the list.                                          */
/* -------------------------------------------------------------------- */
    CSVTable * const psTable = static_cast<CSVTable *>(
        VSI_CALLOC_VERBOSE( sizeof(CSVTable), 1 ) );
    if( psTable == NULL )
    {
        VSIFCloseL(fp);
        return NULL;
    }

psTable->fp = fp;
    psTable->pszFilename = VSI_STRDUP_VERBOSE( pszFilename );
    if( psTable->pszFilename == NULL )
    {
        VSIFree(psTable);
        VSIFCloseL(fp);
        return NULL;
    }
    psTable->bNonUniqueKey = false;  // As far as we know now.
    psTable->psNext = *ppsCSVTableList;

*ppsCSVTableList = psTable;

/* -------------------------------------------------------------------- */
/*      Read the table header record containing the field names.        */
/* -------------------------------------------------------------------- */
    psTable->papszFieldNames = CSVReadParseLineL( fp );

return psTable;
}

/************************************************************************/
/*                            CSVDeaccess()                             */
/************************************************************************/

static void CSVDeaccessInternal( CSVTable **ppsCSVTableList, bool bCanUseTLS,
                                 const char * pszFilename )

{
    if( ppsCSVTableList == NULL )
        return;

/* -------------------------------------------------------------------- */
/*      A NULL means deaccess all tables.                               */
/* -------------------------------------------------------------------- */
    if( pszFilename == NULL )
    {
        while( *ppsCSVTableList != NULL )
            CSVDeaccessInternal( ppsCSVTableList, bCanUseTLS,
                                 (*ppsCSVTableList)->pszFilename );

return;
    }

/* -------------------------------------------------------------------- */
/*      Find this table.                                                */
/* -------------------------------------------------------------------- */
    CSVTable *psLast = NULL;
    CSVTable *psTable = *ppsCSVTableList;
    for( ;
         psTable != NULL && !EQUAL(psTable->pszFilename, pszFilename);
         psTable = psTable->psNext )
    {
        psLast = psTable;
    }

if( psTable == NULL )
    {
        if( bCanUseTLS )
            CPLDebug( "CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename );
        return;
    }

/* -------------------------------------------------------------------- */
/*      Remove the link from the list.                                  */
/* -------------------------------------------------------------------- */
    if( psLast != NULL )
        psLast->psNext = psTable->psNext;
    else
        *ppsCSVTableList = psTable->psNext;

/* -------------------------------------------------------------------- */
/*      Free the table.                                                 */
/* -------------------------------------------------------------------- */
    if( psTable->fp != NULL )
        VSIFCloseL( psTable->fp );

CSLDestroy( psTable->papszFieldNames );
    CSLDestroy( psTable->papszRecFields );
    CPLFree( psTable->pszFilename );
    CPLFree( psTable->panLineIndex );
    CPLFree( psTable->pszRawData );
    CPLFree( psTable->papszLines );

CPLFree( psTable );

if( bCanUseTLS )
        CPLReadLine( NULL );
}

void CSVDeaccess( const char * pszFilename )
{
/* -------------------------------------------------------------------- */
/*      Fetch the table, and allocate the thread-local pointer to it    */
/*      if there isn't already one.                                     */
/* -------------------------------------------------------------------- */
    int bMemoryError = FALSE;
    CSVTable **ppsCSVTableList = static_cast<CSVTable **>(
        CPLGetTLSEx( CTLS_CSVTABLEPTR, &bMemoryError ) );

CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
}

/************************************************************************/
/*                            CSVSplitLine()                            */
/*                                                                      */
/*      Tokenize a CSV line into fields in the form of a string         */
/*      list.  This is used instead of the CPLTokenizeString()          */
/*      because it provides correct CSV escaping and quoting            */
/*      semantics.                                                      */
/************************************************************************/

static char **CSVSplitLine( const char *pszString, char chDelimiter )

{

char *pszToken = static_cast<char *>( VSI_CALLOC_VERBOSE( 10, 1 ) );
    if( pszToken == NULL )
        return NULL;

int nTokenMax = 10;
    char **papszRetList = NULL;

while( pszString != NULL && *pszString != '\0' )
    {
        bool bInString = false;
        int nTokenLen = 0;

/* Try to find the next delimiter, marking end of token */
        for( ; *pszString != '\0'; pszString++ )
        {

/* End if this is a delimiter skip it and break. */
            if( !bInString && *pszString == chDelimiter )
            {
                pszString++;
                break;
            }

if( *pszString == '"' )
            {
                if( !bInString || pszString[1] != '"' )
                {
                    bInString = !bInString;
                    continue;
                }
                else  /* doubled quotes in string resolve to one quote */
                {
                    pszString++;
                }
            }

if( nTokenLen >= nTokenMax-2 )
            {
                nTokenMax = nTokenMax * 2 + 10;
                char* pszTokenNew = static_cast<char *>(
                    VSI_REALLOC_VERBOSE( pszToken, nTokenMax ) );
                if( pszTokenNew == NULL )
                {
                    VSIFree(pszToken);
                    CSLDestroy(papszRetList);
                    return NULL;
                }
                pszToken = pszTokenNew;
            }

pszToken[nTokenLen] = *pszString;
            nTokenLen++;
        }

pszToken[nTokenLen] = '\0';
        char** papszRetListNew = CSLAddStringMayFail( papszRetList, pszToken );
        if( papszRetListNew == NULL )
        {
            VSIFree(pszToken);
            CSLDestroy(papszRetList);
            return NULL;
        }
        papszRetList = papszRetListNew;

/* If the last token is an empty token, then we have to catch
         * it now, otherwise we won't reenter the loop and it will be lost.
         */
        if( *pszString == '\0' && *(pszString-1) == chDelimiter )
        {
            papszRetListNew = CSLAddStringMayFail( papszRetList, "" );
            if( papszRetListNew == NULL )
            {
                VSIFree(pszToken);
                CSLDestroy(papszRetList);
                return NULL;
            }
            papszRetList = papszRetListNew;
        }
    }

VSIFree( pszToken );

return papszRetList;
}

/************************************************************************/
/*                          CSVFindNextLine()                           */
/*                                                                      */
/*      Find the start of the next line, while at the same time zero    */
/*      terminating this line.  Take into account that there may be     */
/*      newline indicators within quoted strings, and that quotes       */
/*      can be escaped with a backslash.                                */
/************************************************************************/

static char *CSVFindNextLine( char *pszThisLine )

{
    int i = 0;  // i is used after the for loop.

for( int nQuoteCount = 0; pszThisLine[i] != '\0'; i++ )
    {
        if( pszThisLine[i] == '\"'
            && (i == 0 || pszThisLine[i-1] != '\\') )
            nQuoteCount++;

if( (pszThisLine[i] == 10 || pszThisLine[i] == 13)
            && (nQuoteCount % 2) == 0 )
            break;
    }

while( pszThisLine[i] == 10 || pszThisLine[i] == 13 )
        pszThisLine[i++] = '\0';

if( pszThisLine[i] == '\0' )
        return NULL;

return pszThisLine + i;
}

/************************************************************************/
/*                             CSVIngest()                              */
/*                                                                      */
/*      Load entire file into memory and setup index if possible.       */
/************************************************************************/

// TODO(schwehr): Clean up all the casting in CSVIngest.
static void CSVIngest( const char *pszFilename )

{
    CSVTable *psTable = CSVAccess( pszFilename );
    if( psTable == NULL )
    {
        CPLError( CE_Failure, CPLE_FileIO, "Failed to open file: %s",
                  pszFilename );
        return;
    }

if( psTable->pszRawData != NULL )
        return;

/* -------------------------------------------------------------------- */
/*      Ingest whole file.                                              */
/* -------------------------------------------------------------------- */
    if( VSIFSeekL( psTable->fp, 0, SEEK_END ) != 0 )
    {
        CPLError( CE_Failure, CPLE_FileIO,
                  "Failed using seek end and tell to get file length: %s",
                  pszFilename );
        return;
    }
    const vsi_l_offset nFileLen = VSIFTellL( psTable->fp );
    if( static_cast<long>(nFileLen) == -1 )
    {
        CPLError( CE_Failure, CPLE_FileIO,
                  "Failed using seek end and tell to get file length: %s",
                  pszFilename );
        return;
    }
    VSIRewindL( psTable->fp );

psTable->pszRawData = static_cast<char *>(
        VSI_MALLOC_VERBOSE( static_cast<size_t>(nFileLen) + 1) );
    if( psTable->pszRawData == NULL )
        return;
    if( VSIFReadL( psTable->pszRawData, 1,
                   static_cast<size_t>(nFileLen), psTable->fp )
        != static_cast<size_t>(nFileLen) )
    {
        CPLFree( psTable->pszRawData );
        psTable->pszRawData = NULL;

CPLError( CE_Failure, CPLE_FileIO, "Read of file %s failed.",
                  psTable->pszFilename );
        return;
    }

psTable->pszRawData[nFileLen] = '\0';

/* -------------------------------------------------------------------- */
/*      Get count of newlines so we can allocate line array.            */
/* -------------------------------------------------------------------- */
    int nMaxLineCount = 0;
    for( int i = 0; i < static_cast<int>(nFileLen); i++ )
    {
        if( psTable->pszRawData[i] == 10 )
            nMaxLineCount++;
    }

psTable->papszLines = static_cast<char **>(
        VSI_CALLOC_VERBOSE( sizeof(char*), nMaxLineCount ) );
    if( psTable->papszLines == NULL )
        return;

/* -------------------------------------------------------------------- */
/*      Build a list of record pointers into the raw data buffer        */
/*      based on line terminators.  Zero terminate the line             */
/*      strings.                                                        */
/* -------------------------------------------------------------------- */
    /* skip header line */
    char *pszThisLine = CSVFindNextLine( psTable->pszRawData );

int iLine = 0;
    while( pszThisLine != NULL && iLine < nMaxLineCount )
    {
        if( pszThisLine[0] != '#' )
            psTable->papszLines[iLine++] = pszThisLine;
        pszThisLine = CSVFindNextLine( pszThisLine );
    }

psTable->nLineCount = iLine;

/* -------------------------------------------------------------------- */
/*      Allocate and populate index array.  Ensure they are in          */
/*      ascending order so that binary searches can be done on the      */
/*      array.                                                          */
/* -------------------------------------------------------------------- */
    psTable->panLineIndex = static_cast<int *>(
        VSI_MALLOC_VERBOSE( sizeof(int) * psTable->nLineCount ) );
    if( psTable->panLineIndex == NULL )
        return;

for( int i = 0; i < psTable->nLineCount; i++ )
    {
        psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);

if( i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i-1] )
        {
            CPLFree( psTable->panLineIndex );
            psTable->panLineIndex = NULL;
            break;
        }
    }

psTable->iLastLine = -1;

/* -------------------------------------------------------------------- */
/*      We should never need the file handle against, so close it.      */
/* -------------------------------------------------------------------- */
    VSIFCloseL( psTable->fp );
    psTable->fp = NULL;
}

/************************************************************************/
/*                        CSVDetectSeperator()                          */
/************************************************************************/

/** Detect which field separator is used.
 *
 * Currently, it can detect comma, semicolon, space or tabulation. In case of
 * ambiguity or no separator found, comma will be considered as the separator.
 *
 * @return ',', ';', ' ' or '\t'
 */
char CSVDetectSeperator( const char* pszLine )
{
    bool bInString = false;
    char chDelimiter = '\0';
    int nCountSpace = 0;

for( ; *pszLine != '\0'; pszLine++ )
    {
        if( !bInString && ( *pszLine == ',' || *pszLine == ';'
                            || *pszLine == '\t'))
        {
            if( chDelimiter == '\0' )
            {
                chDelimiter = *pszLine;
            }
            else if( chDelimiter != *pszLine )
            {
                // The separator is not consistent on the line.
                CPLDebug( "CSV", "Inconsistent separator. '%c' and '%c' found. "
                          "Using ',' as default",
                          chDelimiter, *pszLine);
                chDelimiter = ',';
                break;
            }
        }
        else if( !bInString && *pszLine == ' ' )
        {
            nCountSpace++;
        }
        else if( *pszLine == '"' )
        {
            if( !bInString || pszLine[1] != '"' )
            {
                bInString = !bInString;
                continue;
            }
            else  /* doubled quotes in string resolve to one quote */
            {
                pszLine++;
            }
        }
    }

if( chDelimiter == '\0' )
    {
        if( nCountSpace > 0 )
            chDelimiter = ' ';
        else
            chDelimiter = ',';
    }

return chDelimiter;
}

/************************************************************************/
/*                          CSVReadParseLine()                          */
/*                                                                      */
/*      Read one line, and return split into fields.  The return        */
/*      result is a stringlist, in the sense of the CSL functions.      */
/*                                                                      */
/*      Deprecated.  Replaced by CSVReadParseLineL().                   */
/************************************************************************/

char **CSVReadParseLine( FILE * fp )
{
    return CSVReadParseLine2(fp, ',');
}

char **CSVReadParseLine2( FILE * fp, char chDelimiter )

{
    CPLAssert( fp != NULL );
    if( fp == NULL )
        return NULL;

const char *pszLine = CPLReadLine( fp );
    if( pszLine == NULL )
        return NULL;

/* -------------------------------------------------------------------- */
/*      If there are no quotes, then this is the simple case.           */
/*      Parse, and return tokens.                                       */
/* -------------------------------------------------------------------- */
    if( strchr(pszLine, '\"') == NULL )
        return CSVSplitLine( pszLine, chDelimiter );

/* -------------------------------------------------------------------- */
/*      We must now count the quotes in our working string, and as      */
/*      long as it is odd, keep adding new lines.                       */
/* -------------------------------------------------------------------- */
    char *pszWorkLine = CPLStrdup( pszLine );

int i = 0;
    int nCount = 0;
    size_t nWorkLineLength = strlen(pszWorkLine);

while( true )
    {
        for( ; pszWorkLine[i] != '\0'; i++ )
        {
            if( pszWorkLine[i] == '\"'
                && (i == 0 || pszWorkLine[i-1] != '\\') )
                nCount++;
        }

if( nCount % 2 == 0 )
            break;

pszLine = CPLReadLine( fp );
        if( pszLine == NULL )
            break;

const size_t nLineLen = strlen(pszLine);

char* pszWorkLineTmp = static_cast<char *>(
            VSIRealloc(pszWorkLine,
                       nWorkLineLength + nLineLen + 2) );
        if( pszWorkLineTmp == NULL )
            break;
        pszWorkLine = pszWorkLineTmp;
        // The newline gets lost in CPLReadLine().
        strcat( pszWorkLine + nWorkLineLength, "\n" );
        strcat( pszWorkLine + nWorkLineLength, pszLine );

nWorkLineLength += nLineLen + 1;
    }

char **papszReturn = CSVSplitLine( pszWorkLine, chDelimiter );

CPLFree( pszWorkLine );

return papszReturn;
}

/************************************************************************/
/*                          CSVReadParseLineL()                         */
/*                                                                      */
/*      Read one line, and return split into fields.  The return        */
/*      result is a stringlist, in the sense of the CSL functions.      */
/*                                                                      */
/*      Replaces CSVReadParseLine().  These functions use the VSI       */
/*      layer to allow reading from other file containers.              */
/************************************************************************/

char **CSVReadParseLineL( VSILFILE * fp )
{
    return CSVReadParseLine2L(fp, ',');
}

char **CSVReadParseLine2L( VSILFILE * fp, char chDelimiter )

{
    CPLAssert( fp != NULL );
    if( fp == NULL )
        return NULL;

const char *pszLine = CPLReadLineL( fp );
    if( pszLine == NULL )
        return NULL;

int i = 0;
    int nCount = 0;
    size_t nWorkLineLength = strlen(pszWorkLine);

if( nCount % 2 == 0 )
            break;

pszLine = CPLReadLineL( fp );
        if( pszLine == NULL )
            break;

const size_t nLineLen = strlen(pszLine);

char* pszWorkLineTmp = static_cast<char *>(
            VSIRealloc(pszWorkLine,
                       nWorkLineLength + nLineLen + 2) );
        if( pszWorkLineTmp == NULL )
            break;

pszWorkLine = pszWorkLineTmp;
        // The newline gets lost in CPLReadLine().
        strcat( pszWorkLine + nWorkLineLength, "\n" );
        strcat( pszWorkLine + nWorkLineLength, pszLine );

nWorkLineLength += nLineLen + 1;
    }

char **papszReturn = CSVSplitLine( pszWorkLine, chDelimiter );

CPLFree( pszWorkLine );

return papszReturn;
}

/************************************************************************/
/*                             CSVCompare()                             */
/*                                                                      */
/*      Compare a field to a search value using a particular            */
/*      criteria.                                                       */
/************************************************************************/

static bool CSVCompare( const char * pszFieldValue, const char * pszTarget,
                       CSVCompareCriteria eCriteria )

{
    if( eCriteria == CC_ExactString )
    {
        return( strcmp( pszFieldValue, pszTarget ) == 0 );
    }
    else if( eCriteria == CC_ApproxString )
    {
        return EQUAL( pszFieldValue, pszTarget );
    }
    else if( eCriteria == CC_Integer )
    {
        return( CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
                atoi(pszFieldValue) == atoi(pszTarget) );
    }

return false;
}

/************************************************************************/
/*                            CSVScanLines()                            */
/*                                                                      */
/*      Read the file scanline for lines where the key field equals     */
/*      the indicated value with the suggested comparison criteria.     */
/*      Return the first matching line split into fields.               */
/*                                                                      */
/*      Deprecated.  Replaced by CSVScanLinesL().                       */
/************************************************************************/

char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
                     CSVCompareCriteria eCriteria )

{
    CPLAssert( pszValue != NULL );
    CPLAssert( iKeyField >= 0 );
    CPLAssert( fp != NULL );

bool bSelected = false;
    const int nTestValue = atoi(pszValue);
    char **papszFields = NULL;

while( !bSelected ) {
        papszFields = CSVReadParseLine( fp );
        if( papszFields == NULL )
            return NULL;

if( CSLCount( papszFields ) < iKeyField+1 )
        {
            /* not selected */
        }
        else if( eCriteria == CC_Integer
                 && atoi(papszFields[iKeyField]) == nTestValue )
        {
            bSelected = true;
        }
        else
        {
            bSelected = CSVCompare( papszFields[iKeyField], pszValue,
                                    eCriteria );
        }

if( !bSelected )
        {
            CSLDestroy( papszFields );
            papszFields = NULL;
        }
    }

return papszFields;
}

/************************************************************************/
/*                            CSVScanLinesL()                           */
/*                                                                      */
/*      Read the file scanline for lines where the key field equals     */
/*      the indicated value with the suggested comparison criteria.     */
/*      Return the first matching line split into fields.               */
/************************************************************************/

char **CSVScanLinesL( VSILFILE *fp, int iKeyField, const char * pszValue,
                      CSVCompareCriteria eCriteria )

{
    CPLAssert( pszValue != NULL );
    CPLAssert( iKeyField >= 0 );
    CPLAssert( fp != NULL );

bool bSelected = false;
    const int nTestValue = atoi(pszValue);
    char **papszFields = NULL;

while( !bSelected ) {
        papszFields = CSVReadParseLineL( fp );
        if( papszFields == NULL )
            return NULL;

if( !bSelected )
        {
            CSLDestroy( papszFields );
            papszFields = NULL;
        }
    }

return papszFields;
}

/************************************************************************/
/*                        CSVScanLinesIndexed()                         */
/*                                                                      */
/*      Read the file scanline for lines where the key field equals     */
/*      the indicated value with the suggested comparison criteria.     */
/*      Return the first matching line split into fields.               */
/************************************************************************/

static char **
CSVScanLinesIndexed( CSVTable *psTable, int nKeyValue )

{
    CPLAssert( psTable->panLineIndex != NULL );

/* -------------------------------------------------------------------- */
/*      Find target record with binary search.                          */
/* -------------------------------------------------------------------- */
    int iTop = psTable->nLineCount-1;
    int iBottom = 0;
    int iResult = -1;

while( iTop >= iBottom )
    {
        const int iMiddle = (iTop + iBottom) / 2;
        if( psTable->panLineIndex[iMiddle] > nKeyValue )
            iTop = iMiddle - 1;
        else if( psTable->panLineIndex[iMiddle] < nKeyValue )
            iBottom = iMiddle + 1;
        else
        {
            iResult = iMiddle;
            // if a key is not unique, select the first instance of it.
            while( iResult > 0
                   && psTable->panLineIndex[iResult-1] == nKeyValue )
            {
                psTable->bNonUniqueKey = true;
                iResult--;
            }
            break;
        }
    }

if( iResult == -1 )
        return NULL;

/* -------------------------------------------------------------------- */
/*      Parse target line, and update iLastLine indicator.              */
/* -------------------------------------------------------------------- */
    psTable->iLastLine = iResult;

return CSVSplitLine( psTable->papszLines[iResult], ',' );
}

/************************************************************************/
/*                        CSVScanLinesIngested()                        */
/*                                                                      */
/*      Read the file scanline for lines where the key field equals     */
/*      the indicated value with the suggested comparison criteria.     */
/*      Return the first matching line split into fields.               */
/************************************************************************/

static char **
CSVScanLinesIngested( CSVTable *psTable, int iKeyField, const char * pszValue,
                      CSVCompareCriteria eCriteria )

{
    CPLAssert( pszValue != NULL );
    CPLAssert( iKeyField >= 0 );

const int nTestValue = atoi(pszValue);

/* -------------------------------------------------------------------- */
/*      Short cut for indexed files.                                    */
/* -------------------------------------------------------------------- */
    if( iKeyField == 0 && eCriteria == CC_Integer
        && psTable->panLineIndex != NULL )
        return CSVScanLinesIndexed( psTable, nTestValue );

/* -------------------------------------------------------------------- */
/*      Scan from in-core lines.                                        */
/* -------------------------------------------------------------------- */
    char **papszFields = NULL;
    bool bSelected = false;

while( !bSelected && psTable->iLastLine+1 < psTable->nLineCount ) {
        psTable->iLastLine++;
        papszFields = CSVSplitLine( psTable->papszLines[psTable->iLastLine],
                                    ',' );

if( !bSelected )
        {
            CSLDestroy( papszFields );
            papszFields = NULL;
        }
    }

return papszFields;
}

/************************************************************************/
/*                           CSVGetNextLine()                           */
/*                                                                      */
/*      Fetch the next line of a CSV file based on a passed in          */
/*      filename.  Returns NULL at end of file, or if file is not       */
/*      really established.                                             */
/************************************************************************/

char **CSVGetNextLine( const char *pszFilename )

{

/* -------------------------------------------------------------------- */
/*      Get access to the table.                                        */
/* -------------------------------------------------------------------- */
    CPLAssert( pszFilename != NULL );

CSVTable * const psTable = CSVAccess( pszFilename );
    if( psTable == NULL )
        return NULL;

/* -------------------------------------------------------------------- */
/*      If we use CSVGetNextLine() we can pretty much assume we have    */
/*      a non-unique key.                                               */
/* -------------------------------------------------------------------- */
    psTable->bNonUniqueKey = true;

/* -------------------------------------------------------------------- */
/*      Do we have a next line available?  This only works for          */
/*      ingested tables I believe.                                      */
/* -------------------------------------------------------------------- */
    if( psTable->iLastLine+1 >= psTable->nLineCount )
        return NULL;

psTable->iLastLine++;
    CSLDestroy( psTable->papszRecFields );
    psTable->papszRecFields =
        CSVSplitLine( psTable->papszLines[psTable->iLastLine], ',' );

return psTable->papszRecFields;
}

/************************************************************************/
/*                            CSVScanFile()                             */
/*                                                                      */
/*      Scan a whole file using criteria similar to above, but also     */
/*      taking care of file opening and closing.                        */
/************************************************************************/

char **CSVScanFile( const char * pszFilename, int iKeyField,
                    const char * pszValue, CSVCompareCriteria eCriteria )

{
/* -------------------------------------------------------------------- */
/*      Get access to the table.                                        */
/* -------------------------------------------------------------------- */
    CPLAssert( pszFilename != NULL );

if( iKeyField < 0 )
        return NULL;

CSVTable * const psTable = CSVAccess( pszFilename );
    if( psTable == NULL )
        return NULL;

CSVIngest( pszFilename );

/* -------------------------------------------------------------------- */
/*      Does the current record match the criteria?  If so, just        */
/*      return it again.                                                */
/* -------------------------------------------------------------------- */
    if( iKeyField >= 0
        && iKeyField < CSLCount(psTable->papszRecFields)
        && CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria)
        && !psTable->bNonUniqueKey )
    {
        return psTable->papszRecFields;
    }

/* -------------------------------------------------------------------- */
/*      Scan the file from the beginning, replacing the ``current       */
/*      record'' in our structure with the one that is found.           */
/* -------------------------------------------------------------------- */
    psTable->iLastLine = -1;
    CSLDestroy( psTable->papszRecFields );

if( psTable->pszRawData != NULL )
        psTable->papszRecFields =
            CSVScanLinesIngested( psTable, iKeyField, pszValue, eCriteria );
    else
    {
        VSIRewindL( psTable->fp );
        CPLReadLineL( psTable->fp );         /* throw away the header line */

psTable->papszRecFields =
            CSVScanLinesL( psTable->fp, iKeyField, pszValue, eCriteria );
    }

return psTable->papszRecFields;
}

/************************************************************************/
/*                           CPLGetFieldId()                            */
/*                                                                      */
/*      Read the first record of a CSV file (rewinding to be sure),     */
/*      and find the field with the indicated name.  Returns -1 if      */
/*      it fails to find the field name.  Comparison is case            */
/*      insensitive, but otherwise exact.  After this function has      */
/*      been called the file pointer will be positioned just after      */
/*      the first record.                                               */
/*                                                                      */
/*      Deprecated.  Replaced by CPLGetFieldIdL().                      */
/************************************************************************/

int CSVGetFieldId( FILE * fp, const char * pszFieldName )

{
    CPLAssert( fp != NULL && pszFieldName != NULL );

VSIRewind( fp );

char **papszFields = CSVReadParseLine( fp );
    for( int i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
    {
        if( EQUAL(papszFields[i], pszFieldName) )
        {
            CSLDestroy( papszFields );
            return i;
        }
    }

CSLDestroy( papszFields );

return -1;
}

/************************************************************************/
/*                           CPLGetFieldIdL()                           */
/*                                                                      */
/*      Read the first record of a CSV file (rewinding to be sure),     */
/*      and find the field with the indicated name.  Returns -1 if      */
/*      it fails to find the field name.  Comparison is case            */
/*      insensitive, but otherwise exact.  After this function has      */
/*      been called the file pointer will be positioned just after      */
/*      the first record.                                               */
/************************************************************************/

int CSVGetFieldIdL( VSILFILE * fp, const char * pszFieldName )

{
    CPLAssert( fp != NULL && pszFieldName != NULL );

VSIRewindL( fp );

char **papszFields = CSVReadParseLineL( fp );
    for( int i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
    {
        if( EQUAL(papszFields[i], pszFieldName) )
        {
            CSLDestroy( papszFields );
            return i;
        }
    }

CSLDestroy( papszFields );

return -1;
}

/************************************************************************/
/*                         CSVGetFileFieldId()                          */
/*                                                                      */
/*      Same as CPLGetFieldId(), except that we get the file based      */
/*      on filename, rather than having an existing handle.             */
/************************************************************************/

int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )

CSVTable * const psTable = CSVAccess( pszFilename );
    if( psTable == NULL )
        return -1;

/* -------------------------------------------------------------------- */
/*      Find the requested field.                                       */
/* -------------------------------------------------------------------- */
    for( int i = 0;
         psTable->papszFieldNames != NULL
             && psTable->papszFieldNames[i] != NULL;
         i++ )
    {
        if( EQUAL(psTable->papszFieldNames[i], pszFieldName) )
        {
            return i;
        }
    }

return -1;
}

/************************************************************************/
/*                         CSVScanFileByName()                          */
/*                                                                      */
/*      Same as CSVScanFile(), but using a field name instead of a      */
/*      field number.                                                   */
/************************************************************************/

char **CSVScanFileByName( const char * pszFilename,
                          const char * pszKeyFieldName,
                          const char * pszValue, CSVCompareCriteria eCriteria )

{
    const int iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
    if( iKeyField == -1 )
        return NULL;

return CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria );
}

/************************************************************************/
/*                            CSVGetField()                             */
/*                                                                      */
/*      The all-in-one function to fetch a particular field value       */
/*      from a CSV file.  Note this function will return an empty       */
/*      string, rather than NULL if it fails to find the desired        */
/*      value for some reason.  The caller can't establish that the     */
/*      fetch failed.                                                   */
/************************************************************************/

const char *CSVGetField( const char * pszFilename,
                         const char * pszKeyFieldName,
                         const char * pszKeyFieldValue,
                         CSVCompareCriteria eCriteria,
                         const char * pszTargetField )

{
/* -------------------------------------------------------------------- */
/*      Find the table.                                                 */
/* -------------------------------------------------------------------- */
    CSVTable * const psTable = CSVAccess( pszFilename );
    if( psTable == NULL )
        return "";

/* -------------------------------------------------------------------- */
/*      Find the correct record.                                        */
/* -------------------------------------------------------------------- */
    char **papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
                                            pszKeyFieldValue, eCriteria );
    if( papszRecord == NULL )
        return "";

/* -------------------------------------------------------------------- */
/*      Figure out which field we want out of this.                     */
/* -------------------------------------------------------------------- */
    const int iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
    if( iTargetField < 0 )
        return "";

for( int i=0; papszRecord[i] != NULL; ++i )
    {
        if( i == iTargetField )
            return papszRecord[iTargetField];
    }
    return "";
}

/************************************************************************/
/*                       GDALDefaultCSVFilename()                       */
/************************************************************************/

typedef struct
{
    char szPath[512];
    bool bCSVFinderInitialized;
} DefaultCSVFileNameTLS;

const char * GDALDefaultCSVFilename( const char *pszBasename )

{
/* -------------------------------------------------------------------- */
/*      Do we already have this file accessed?  If so, just return      */
/*      the existing path without any further probing.                  */
/* -------------------------------------------------------------------- */
    int bMemoryError = FALSE;
    CSVTable **ppsCSVTableList = static_cast<CSVTable **>(
      CPLGetTLSEx( CTLS_CSVTABLEPTR, &bMemoryError ) );
    if( ppsCSVTableList != NULL )
    {
        const size_t nBasenameLen = strlen(pszBasename);

for( const CSVTable *psTable = *ppsCSVTableList;
             psTable != NULL;
             psTable = psTable->psNext )
        {
            const size_t nFullLen = strlen(psTable->pszFilename);

if( nFullLen > nBasenameLen
                && strcmp(psTable->pszFilename+nFullLen-nBasenameLen,
                          pszBasename) == 0
                && strchr("/\\", psTable->pszFilename[+nFullLen-nBasenameLen-1])
                          != NULL )
            {
                return psTable->pszFilename;
            }
        }
    }

/* -------------------------------------------------------------------- */
/*      Otherwise we need to look harder for it.                        */
/* -------------------------------------------------------------------- */
    DefaultCSVFileNameTLS* pTLSData =
        static_cast<DefaultCSVFileNameTLS *>(
            CPLGetTLSEx( CTLS_CSVDEFAULTFILENAME, &bMemoryError ) );
    if( pTLSData == NULL && !bMemoryError )
    {
        pTLSData = static_cast<DefaultCSVFileNameTLS *>(
            VSI_CALLOC_VERBOSE( 1, sizeof(DefaultCSVFileNameTLS) ) );
        if( pTLSData )
            CPLSetTLS( CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE );
    }
    if( pTLSData == NULL )
        return "/not_existing_dir/not_existing_path";

const char *pszResult = CPLFindFile( "epsg_csv", pszBasename );

if( pszResult != NULL )
        return pszResult;

if( !pTLSData->bCSVFinderInitialized )
    {
        pTLSData->bCSVFinderInitialized = true;

if( CPLGetConfigOption("GEOTIFF_CSV", NULL) != NULL )
            CPLPushFinderLocation( CPLGetConfigOption("GEOTIFF_CSV", NULL));

if( CPLGetConfigOption("GDAL_DATA", NULL) != NULL )
            CPLPushFinderLocation( CPLGetConfigOption("GDAL_DATA", NULL) );

pszResult = CPLFindFile( "epsg_csv", pszBasename );

if( pszResult != NULL )
            return pszResult;
    }

#ifdef GDAL_NO_HARDCODED_FIND
    // For systems like sandboxes that do not allow other checks.
    CPLDebug( "CPL_CSV",
              "Failed to find file in GDALDefaultCSVFilename.  "
              "Returning original basename: %s",
              pszBasename );
    strcpy( pTLSData->szPath, pszBasename );
    return pTLSData->szPath;
#else

#ifdef GDAL_PREFIX
  #ifdef MACOSX_FRAMEWORK
    strcpy( pTLSData->szPath, GDAL_PREFIX "/Resources/epsg_csv/" );
    CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
  #else
    strcpy( pTLSData->szPath, GDAL_PREFIX "/share/epsg_csv/" );
    CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
  #endif
#else
    strcpy( pTLSData->szPath, "/usr/local/share/epsg_csv/" );
    CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
#endif  // GDAL_PREFIX

VSILFILE *fp = VSIFOpenL( pTLSData->szPath, "rt" );
    if( fp == NULL )
        CPLStrlcpy( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );

if( fp != NULL )
        VSIFCloseL( fp );

return pTLSData->szPath;
#endif  // GDAL_NO_HARDCODED_FIND
}

/************************************************************************/
/*                            CSVFilename()                             */
/*                                                                      */
/*      Return the full path to a particular CSV file.  This will       */
/*      eventually be something the application can override.           */
/************************************************************************/

CPL_C_START
static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
CPL_C_END

const char * CSVFilename( const char *pszBasename )

{
    if( pfnCSVFilenameHook == NULL )
        return GDALDefaultCSVFilename( pszBasename );

return pfnCSVFilenameHook( pszBasename );
}

/************************************************************************/
/*                         SetCSVFilenameHook()                         */
/*                                                                      */
/*      Applications can use this to set a function that will           */
/*      massage CSV filenames.                                          */
/************************************************************************/

/**
 * Override CSV file search method.
 *
 * @param pfnNewHook The pointer to a function which will return the
 * full path for a given filename.
 *

This function allows an application to override how the GTIFGetDefn()
and related function find the CSV (Comma Separated Value) values
required. The pfnHook argument should be a pointer to a function that
will take in a CSV filename and return a full path to the file. The
returned string should be to an internal static buffer so that the
caller doesn't have to free the result.

<b>Example:</b><br>

The listgeo utility uses the following override function if the user
specified a CSV file directory with the -t commandline switch (argument
put into CSVDirName).  <p>

<pre>

...
    SetCSVFilenameHook( CSVFileOverride );
    ...

static const char *CSVFileOverride( const char * pszInput )

{
    static char szPath[1024] = {};

#ifdef WIN32
    sprintf( szPath, "%s\\%s", CSVDirName, pszInput );
#else
    sprintf( szPath, "%s/%s", CSVDirName, pszInput );
#endif

return szPath;
}
</pre>

CPL_C_START
void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )

{
    pfnCSVFilenameHook = pfnNewHook;
}
CPL_C_END