EVOLUTION-MANAGER
Edit File: mitab_utils.cpp
/********************************************************************** * $Id: mitab_utils.cpp,v 1.26 2011-06-16 15:53:12 fwarmerdam Exp $ * * Name: mitab_utils.cpp * Project: MapInfo TAB Read/Write library * Language: C++ * Purpose: Misc. util. functions for the library * Author: Daniel Morissette, dmorissette@dmsolutions.ca * ********************************************************************** * Copyright (c) 1999-2001, Daniel Morissette * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. ********************************************************************** * * $Log: mitab_utils.cpp,v $ * Revision 1.26 2011-06-16 15:53:12 fwarmerdam * improve TABBasename() for filenames with an embedded dot (gdal #4123) * * Revision 1.25 2010-07-07 19:00:15 aboudreault * Cleanup Win32 Compile Warnings (GDAL bug #2930) * * Revision 1.24 2010-07-05 17:41:07 aboudreault * Fixed TABCleanFieldName() function should allow char '#' in field name (bug 2231) * * Revision 1.23 2010-01-07 20:39:12 aboudreault * Added support to handle duplicate field names, Added validation to check if a field name start with a number (bug 2141) * * Revision 1.22 2008-07-21 16:04:58 dmorissette * Fixed const char * warnings with GCC 4.3 (GDAL ticket #2325) * * Revision 1.21 2006/12/01 16:53:15 dmorissette * Wrapped <mbctype.h> stuff with !defined(WIN32CE) (done by mloskot in OGR) * * Revision 1.20 2005/08/07 21:02:14 fwarmerdam * avoid warnings about testing for characters > 255. * * Revision 1.19 2004/06/30 20:29:04 dmorissette * Fixed refs to old address danmo@videotron.ca * * Revision 1.18 2002/08/28 14:19:22 warmerda * fix TABGetBasename() for mixture of path divider types like 'mi/abc\def.tab' * * Revision 1.17 2001/06/27 19:52:54 warmerda * avoid multi byte support if _WIN32 and unix defined for cygwin support * * Revision 1.16 2001/01/23 21:23:42 daniel * Added projection bounds lookup table, called from TABFile::SetProjInfo() * * Revision 1.15 2001/01/19 06:06:18 daniel * Don't filter chars in TABCleanFieldName() if we're on a DBCS system * * Revision 1.14 2000/09/28 16:39:44 warmerda * avoid warnings for unused, and unitialized variables * * Revision 1.13 2000/09/20 18:35:51 daniel * Fixed TABAdjustFilenameExtension() to also handle basename and path * using TABAdjustCaseSensitiveFilename() * * Revision 1.12 2000/04/18 04:19:22 daniel * Now accept extended chars with accents in TABCleanFieldName() * * Revision 1.11 2000/02/28 17:08:56 daniel * Avoid using isalnum() in TABCleanFieldName * * Revision 1.10 2000/02/18 20:46:35 daniel * Added TABCleanFieldName() * * Revision 1.9 2000/01/15 22:30:45 daniel * Switch to MIT/X-Consortium OpenSource license * * Revision 1.8 2000/01/14 23:46:59 daniel * Added TABEscapeString()/TABUnEscapeString() * * Revision 1.7 1999/12/16 06:10:24 daniel * TABGetBasename(): make sure last '/' of path is removed * * Revision 1.6 1999/12/14 02:08:37 daniel * Added TABGetBasename() + TAB_CSLLoad() * * Revision 1.5 1999/11/08 04:30:59 stephane * Modify TABGenerateArc() * * Revision 1.4 1999/09/29 17:59:21 daniel * Definition for PI was gone on Windows * * Revision 1.3 1999/09/16 02:39:17 daniel * Completed read support for most feature types * * Revision 1.2 1999/07/12 05:44:59 daniel * Added include math.h for VC++ * * Revision 1.1 1999/07/12 04:18:25 daniel * Initial checkin * **********************************************************************/ #include "mitab.h" #include "mitab_utils.h" #include "cpl_conv.h" #include <math.h> /* sin()/cos() */ #include <ctype.h> /* toupper()/tolower() */ #if defined(_WIN32) && !defined(unix) && !defined(WIN32CE) # include <mbctype.h> /* Multibyte chars stuff */ #endif /********************************************************************** * TABGenerateArc() * * Generate the coordinates for an arc and ADD the coordinates to the * geometry object. If the geometry already contains some points then * these won't be lost. * * poLine can be a OGRLineString or one of its derived classes, such as * OGRLinearRing * numPoints is the number of points to generate. * Angles are specified in radians, valid values are in the range [0..2*PI] * * Arcs are always generated counterclockwise, even if StartAngle > EndAngle * * Returns 0 on success, -1 on error. **********************************************************************/ int TABGenerateArc(OGRLineString *poLine, int numPoints, double dCenterX, double dCenterY, double dXRadius, double dYRadius, double dStartAngle, double dEndAngle) { double dX, dY, dAngleStep, dAngle=0.0; int i; // Adjust angles to go counterclockwise if (dEndAngle < dStartAngle) dEndAngle += 2.0*PI; dAngleStep = (dEndAngle-dStartAngle)/(numPoints-1.0); for(i=0; i<numPoints; i++) { dAngle = (dStartAngle + (double)i*dAngleStep); dX = dCenterX + dXRadius*cos(dAngle); dY = dCenterY + dYRadius*sin(dAngle); poLine->addPoint(dX, dY); } // Complete the arc with the last EndAngle, to make sure that // the arc is correcly close. dX = dCenterX + dXRadius*cos(dAngle); dY = dCenterY + dYRadius*sin(dAngle); poLine->addPoint(dX,dY); return 0; } /********************************************************************** * TABCloseRing() * * Check if a ring is closed, and add a point to close it if necessary. * * Returns 0 on success, -1 on error. **********************************************************************/ int TABCloseRing(OGRLineString *poRing) { if ( poRing->getNumPoints() > 0 && !poRing->get_IsClosed() ) { poRing->addPoint(poRing->getX(0), poRing->getY(0)); } return 0; } /********************************************************************** * TABAdjustCaseSensitiveFilename() * * Scan a filename and its path, adjust uppercase/lowercases if * necessary. * * Returns TRUE if file found, or FALSE if it could not be located with * a case-insensitive search. * * This function works on the original buffer and returns a reference to it. * It does nothing on Windows systems where filenames are not case sensitive. **********************************************************************/ GBool TABAdjustCaseSensitiveFilename(char *pszFname) { #ifdef _WIN32 /*----------------------------------------------------------------- * Nothing to do on Windows *----------------------------------------------------------------*/ return TRUE; #else /*----------------------------------------------------------------- * Unix case. *----------------------------------------------------------------*/ VSIStatBuf sStatBuf; char *pszTmpPath = NULL; int nTotalLen, iTmpPtr; GBool bValidPath; /*----------------------------------------------------------------- * First check if the filename is OK as is. *----------------------------------------------------------------*/ if (VSIStat(pszFname, &sStatBuf) == 0) { return TRUE; } /*----------------------------------------------------------------- * OK, file either does not exist or has the wrong cases... we'll * go backwards until we find a portion of the path that is valid. *----------------------------------------------------------------*/ pszTmpPath = CPLStrdup(pszFname); nTotalLen = strlen(pszTmpPath); iTmpPtr = nTotalLen; bValidPath = FALSE; while(iTmpPtr > 0 && !bValidPath) { /*------------------------------------------------------------- * Move back to the previous '/' separator *------------------------------------------------------------*/ pszTmpPath[--iTmpPtr] = '\0'; while( iTmpPtr > 0 && pszTmpPath[iTmpPtr-1] != '/' ) { pszTmpPath[--iTmpPtr] = '\0'; } if (iTmpPtr > 0 && VSIStat(pszTmpPath, &sStatBuf) == 0) bValidPath = TRUE; } CPLAssert(iTmpPtr >= 0); /*----------------------------------------------------------------- * Assume that CWD is valid... so an empty path is a valid path *----------------------------------------------------------------*/ if (iTmpPtr == 0) bValidPath = TRUE; /*----------------------------------------------------------------- * OK, now that we have a valid base, reconstruct the whole path * by scanning all the sub-directories. * If we get to a point where a path component does not exist then * we simply return the rest of the path as is. *----------------------------------------------------------------*/ while(bValidPath && (int)strlen(pszTmpPath) < nTotalLen) { char **papszDir=NULL; int iEntry, iLastPartStart; iLastPartStart = iTmpPtr; papszDir = CPLReadDir(pszTmpPath); /*------------------------------------------------------------- * Add one component to the current path *------------------------------------------------------------*/ pszTmpPath[iTmpPtr] = pszFname[iTmpPtr]; iTmpPtr++; for( ; pszFname[iTmpPtr] != '\0' && pszFname[iTmpPtr]!='/'; iTmpPtr++) { pszTmpPath[iTmpPtr] = pszFname[iTmpPtr]; } while(iLastPartStart < iTmpPtr && pszTmpPath[iLastPartStart] == '/') iLastPartStart++; /*------------------------------------------------------------- * And do a case insensitive search in the current dir... *------------------------------------------------------------*/ for(iEntry=0; papszDir && papszDir[iEntry]; iEntry++) { if (EQUAL(pszTmpPath+iLastPartStart, papszDir[iEntry])) { /* Fount it! */ strcpy(pszTmpPath+iLastPartStart, papszDir[iEntry]); break; } } if (iTmpPtr > 0 && VSIStat(pszTmpPath, &sStatBuf) != 0) bValidPath = FALSE; CSLDestroy(papszDir); } /*----------------------------------------------------------------- * We reached the last valid path component... just copy the rest * of the path as is. *----------------------------------------------------------------*/ if (iTmpPtr < nTotalLen-1) { strncpy(pszTmpPath+iTmpPtr, pszFname+iTmpPtr, nTotalLen-iTmpPtr); } /*----------------------------------------------------------------- * Update the source buffer and return. *----------------------------------------------------------------*/ strcpy(pszFname, pszTmpPath); CPLFree(pszTmpPath); return bValidPath; #endif } /********************************************************************** * TABAdjustFilenameExtension() * * Because Unix filenames are case sensitive and MapInfo datasets often have * mixed cases filenames, we use this function to find the right filename * to use ot open a specific file. * * This function works directly on the source string, so the filename it * contains at the end of the call is the one that should be used. * * Returns TRUE if one of the extensions worked, and FALSE otherwise. * If none of the extensions worked then the original extension will NOT be * restored. **********************************************************************/ GBool TABAdjustFilenameExtension(char *pszFname) { VSIStatBuf sStatBuf; int i; /*----------------------------------------------------------------- * First try using filename as provided *----------------------------------------------------------------*/ if (VSIStat(pszFname, &sStatBuf) == 0) { return TRUE; } /*----------------------------------------------------------------- * Try using uppercase extension (we assume that fname contains a '.') *----------------------------------------------------------------*/ for(i = strlen(pszFname)-1; i >= 0 && pszFname[i] != '.'; i--) { pszFname[i] = (char)toupper(pszFname[i]); } if (VSIStat(pszFname, &sStatBuf) == 0) { return TRUE; } /*----------------------------------------------------------------- * Try using lowercase extension *----------------------------------------------------------------*/ for(i = strlen(pszFname)-1; i >= 0 && pszFname[i] != '.'; i--) { pszFname[i] = (char)tolower(pszFname[i]); } if (VSIStat(pszFname, &sStatBuf) == 0) { return TRUE; } /*----------------------------------------------------------------- * None of the extensions worked! * Try adjusting cases in the whole path and filename *----------------------------------------------------------------*/ return TABAdjustCaseSensitiveFilename(pszFname); } /********************************************************************** * TABGetBasename() * * Extract the basename part of a complete file path. * * Returns a newly allocated string without the leading path (dirs) and * the extenstion. The returned string should be freed using CPLFree(). **********************************************************************/ char *TABGetBasename(const char *pszFname) { const char *pszTmp = NULL; /*----------------------------------------------------------------- * Skip leading path or use whole name if no path dividers are * encountered. *----------------------------------------------------------------*/ pszTmp = pszFname + strlen(pszFname) - 1; while ( pszTmp != pszFname && *pszTmp != '/' && *pszTmp != '\\' ) pszTmp--; if( pszTmp != pszFname ) pszTmp++; /*----------------------------------------------------------------- * Now allocate our own copy and remove extension *----------------------------------------------------------------*/ char *pszBasename = CPLStrdup(pszTmp); int i; for(i=strlen(pszBasename)-1; i >= 0; i-- ) { if (pszBasename[i] == '.') { pszBasename[i] = '\0'; break; } } return pszBasename; } /********************************************************************** * TAB_CSLLoad() * * Same as CSLLoad(), but does not produce an error if it fails... it * just returns NULL silently instead. * * Load a test file into a stringlist. * * Lines are limited in length by the size fo the CPLReadLine() buffer. **********************************************************************/ char **TAB_CSLLoad(const char *pszFname) { FILE *fp; const char *pszLine; char **papszStrList=NULL; fp = VSIFOpen(pszFname, "rt"); if (fp) { while(!VSIFEof(fp)) { if ( (pszLine = CPLReadLine(fp)) != NULL ) { papszStrList = CSLAddString(papszStrList, pszLine); } } VSIFClose(fp); } return papszStrList; } /********************************************************************** * TABUnEscapeString() * * Convert a string that can possibly contain escaped "\n" chars in * into into a new one with binary newlines in it. * * Tries to work on hte original buffer unless bSrcIsConst=TRUE, in * which case the original is always untouched and a copy is allocated * ONLY IF NECESSARY. This means that the caller should compare the * return value and the source (pszString) to see if a copy was returned, * in which case the caller becomes responsible of freeing both the * source and the copy. **********************************************************************/ char *TABUnEscapeString(char *pszString, GBool bSrcIsConst) { /*----------------------------------------------------------------- * First check if we need to do any replacement *----------------------------------------------------------------*/ if (pszString == NULL || strstr(pszString, "\\n") == NULL) { return pszString; } /*----------------------------------------------------------------- * Yes, we need to replace at least one "\n" * We try to work on the original buffer unless we have bSrcIsConst=TRUE * * Note that we do not worry about freeing the source buffer when we * return a copy... it is up to the caller to decide if he needs to * free the source based on context and by comparing pszString with * the returned pointer (pszWorkString) to see if they are identical. *----------------------------------------------------------------*/ char *pszWorkString = NULL; int i =0; int j =0; if (bSrcIsConst) { // We have to create a copy to work on. pszWorkString = (char *)CPLMalloc(sizeof(char) * (strlen(pszString) +1)); } else { // We'll work on the original. pszWorkString = pszString; } while (pszString[i]) { if (pszString[i] =='\\' && pszString[i+1] == 'n') { pszWorkString[j++] = '\n'; i+= 2; } else if (pszString[i] =='\\' && pszString[i+1] == '\\') { pszWorkString[j++] = '\\'; i+= 2; } else { pszWorkString[j++] = pszString[i++]; } } pszWorkString[j++] = '\0'; return pszWorkString; } /********************************************************************** * TABEscapeString() * * Convert a string that can possibly contain binary "\n" chars in * into into a new one with escaped newlines ("\\" + "n") in it. * * The function returns the original string pointer if it did not need to * be modified, or a copy that has to be freed by the caller if the * string had to be modified. * * It is up to the caller to decide if he needs to free the returned * string by comparing the source (pszString) pointer with the returned * pointer (pszWorkString) to see if they are identical. **********************************************************************/ char *TABEscapeString(char *pszString) { /*----------------------------------------------------------------- * First check if we need to do any replacement *----------------------------------------------------------------*/ if (pszString == NULL || strchr(pszString, '\n') == NULL) { return pszString; } /*----------------------------------------------------------------- * OK, we need to do some replacements... alloc a copy big enough * to hold the worst possible case *----------------------------------------------------------------*/ char *pszWorkString = (char *)CPLMalloc(2*sizeof(char) * (strlen(pszString) +1)); int i =0; int j =0; while (pszString[i]) { if (pszString[i] =='\n') { pszWorkString[j++] = '\\'; pszWorkString[j++] = 'n'; i++; } else if (pszString[i] =='\\') { pszWorkString[j++] = '\\'; pszWorkString[j++] = '\\'; i++; } else { pszWorkString[j++] = pszString[i++]; } } pszWorkString[j++] = '\0'; return pszWorkString; } /********************************************************************** * TABCleanFieldName() * * Return a copy of pszSrcName that contains only valid characters for a * TAB field name. All invalid characters are replaced by '_'. * * The returned string should be freed by the caller. **********************************************************************/ char *TABCleanFieldName(const char *pszSrcName) { char *pszNewName; int numInvalidChars = 0; pszNewName = CPLStrdup(pszSrcName); if (strlen(pszNewName) > 31) { pszNewName[31] = '\0'; CPLError(CE_Warning, TAB_WarningInvalidFieldName, "Field name '%s' is longer than the max of 31 characters. " "'%s' will be used instead.", pszSrcName, pszNewName); } #if defined(_WIN32) && !defined(unix) && !defined(WIN32CE) /*----------------------------------------------------------------- * On Windows, check if we're using a double-byte codepage, and * if so then just keep the field name as is... *----------------------------------------------------------------*/ if (_getmbcp() != 0) return pszNewName; #endif /*----------------------------------------------------------------- * According to the MapInfo User's Guide (p. 240, v5.5) * New Table Command: * Name: * Displays the field name in the name box. You can also enter new field * names here. Defaults are Field1, Field2, etc. A field name can contain * up to 31 alphanumeric characters. Use letters, numbers, and the * underscore. Do not use spaces; instead, use the underscore character * (_) to separate words in a field name. Use upper and lower case for * legibility, but MapInfo is not case-sensitive. * * It was also verified that extended chars with accents are also * accepted. *----------------------------------------------------------------*/ for(int i=0; pszSrcName && pszSrcName[i] != '\0'; i++) { if ( pszSrcName[i]=='#' ) { if (i == 0) { pszNewName[i] = '_'; numInvalidChars++; } } else if ( !( pszSrcName[i] == '_' || (i!=0 && pszSrcName[i]>='0' && pszSrcName[i]<='9') || (pszSrcName[i]>='a' && pszSrcName[i]<='z') || (pszSrcName[i]>='A' && pszSrcName[i]<='Z') || (GByte)pszSrcName[i]>=192 ) ) { pszNewName[i] = '_'; numInvalidChars++; } } if (numInvalidChars > 0) { CPLError(CE_Warning, TAB_WarningInvalidFieldName, "Field name '%s' contains invalid characters. " "'%s' will be used instead.", pszSrcName, pszNewName); } return pszNewName; } /********************************************************************** * MapInfo Units string to numeric ID conversion **********************************************************************/ typedef struct { int nUnitId; const char *pszAbbrev; } MapInfoUnitsInfo; static MapInfoUnitsInfo gasUnitsList[] = { {0, "mi"}, {1, "km"}, {2, "in"}, {3, "ft"}, {4, "yd"}, {5, "mm"}, {6, "cm"}, {7, "m"}, {8, "survey ft"}, {9, "nmi"}, {30, "li"}, {31, "ch"}, {32, "rd"}, {-1, NULL} }; /********************************************************************** * TABUnitIdToString() * * Return the MIF units name for specified units id. * Return "" if no match found. * * The returned string should not be freed by the caller. **********************************************************************/ const char *TABUnitIdToString(int nId) { MapInfoUnitsInfo *psList; psList = gasUnitsList; while(psList->nUnitId != -1) { if (psList->nUnitId == nId) return psList->pszAbbrev; psList++; } return ""; } /********************************************************************** * TABUnitIdFromString() * * Return the units ID for specified MIF units name * * Returns -1 if no match found. **********************************************************************/ int TABUnitIdFromString(const char *pszName) { MapInfoUnitsInfo *psList; psList = gasUnitsList; while(psList->nUnitId != -1) { if (EQUAL(psList->pszAbbrev, pszName)) return psList->nUnitId; psList++; } return -1; }