EVOLUTION-MANAGER
Edit File: cpl_xml_validate.cpp
/****************************************************************************** * * Project: CPL - Common Portability Library * Purpose: Implement XML validation against XSD schema * Author: Even Rouault, even.rouault at mines-paris.org * ****************************************************************************** * Copyright (c) 2012-2014, Even Rouault <even dot rouault at mines-paris dot org> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. ****************************************************************************/ #include "cpl_port.h" #include "cpl_conv.h" #include "cpl_error.h" CPL_CVSID("$Id: cpl_xml_validate.cpp 0f654dda9faabf9d86a44293f0f89903a8e97dd7 2018-04-15 20:18:32 +0200 Even Rouault $") #ifdef HAVE_LIBXML2 #include <libxml/xmlversion.h> #if defined(LIBXML_VERSION) && LIBXML_VERSION >= 20622 // We need at least 2.6.20 for xmlSchemaValidateDoc // and xmlParseDoc to accept a const xmlChar* // We could workaround it, but likely not worth the effort for now. // Actually, we need at least 2.6.22, at runtime, to be // able to parse the OGC GML schemas #define HAVE_RECENT_LIBXML2 // libxml2 before 2.8.0 had a bug to parse the OGC GML schemas // We have a workaround for that for versions >= 2.6.20 and < 2.8.0. #if defined(LIBXML_VERSION) && LIBXML_VERSION < 20800 #define HAS_VALIDATION_BUG #endif #else #warning "Not recent enough libxml2 version" #endif #endif #ifdef HAVE_RECENT_LIBXML2 #include <string.h> #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunknown-pragmas" #pragma clang diagnostic ignored "-Wdocumentation" #endif #include <libxml/xmlschemas.h> #include <libxml/parserInternals.h> #include <libxml/catalog.h> #ifdef __clang__ #pragma clang diagnostic pop #endif #include "cpl_string.h" #include "cpl_hash_set.h" #include "cpl_minixml.h" static xmlExternalEntityLoader pfnLibXMLOldExtranerEntityLoader = nullptr; /************************************************************************/ /* CPLFixPath() */ /************************************************************************/ // Replace \ by / to make libxml2 happy on Windows and // replace "a/b/../c" pattern by "a/c". static void CPLFixPath(char* pszPath) { for( int i = 0; pszPath[i] != '\0'; ++i ) { if( pszPath[i] == '\\' ) pszPath[i] = '/'; } while( true ) { char* pszSlashDotDot = strstr(pszPath, "/../"); if( pszSlashDotDot == nullptr || pszSlashDotDot == pszPath ) return; char* pszSlashBefore = pszSlashDotDot - 1; while( pszSlashBefore > pszPath && *pszSlashBefore != '/' ) pszSlashBefore--; if( pszSlashBefore == pszPath ) return; memmove(pszSlashBefore + 1, pszSlashDotDot + 4, strlen(pszSlashDotDot + 4) + 1); } } #ifdef HAS_VALIDATION_BUG /************************************************************************/ /* CPLHasLibXMLBugWarningCallback() */ /************************************************************************/ static void CPLHasLibXMLBugWarningCallback ( void * /*ctx*/, const char* /*msg*/, ... ) {} /************************************************************************/ /* CPLHasLibXMLBug() */ /************************************************************************/ static bool CPLHasLibXMLBug() { static bool bHasLibXMLBug = false; static bool bLibXMLBugChecked = false; if( bLibXMLBugChecked ) return bHasLibXMLBug; constexpr char szLibXMLBugTester[] = "<schema targetNamespace=\"http://foo\" " "xmlns:foo=\"http://foo\" xmlns=\"http://www.w3.org/2001/XMLSchema\">" "<simpleType name=\"t1\">" "<list itemType=\"double\"/>" "</simpleType>" "<complexType name=\"t2\">" "<simpleContent>" "<extension base=\"foo:t1\"/>" "</simpleContent>" "</complexType>" "<complexType name=\"t3\">" "<simpleContent>" "<restriction base=\"foo:t2\">" "<length value=\"2\"/>" "</restriction>" "</simpleContent>" "</complexType>" "</schema>"; xmlSchemaParserCtxtPtr pSchemaParserCtxt = xmlSchemaNewMemParserCtxt(szLibXMLBugTester, strlen(szLibXMLBugTester)); xmlSchemaSetParserErrors(pSchemaParserCtxt, CPLHasLibXMLBugWarningCallback, CPLHasLibXMLBugWarningCallback, nullptr); xmlSchemaPtr pSchema = xmlSchemaParse(pSchemaParserCtxt); xmlSchemaFreeParserCtxt(pSchemaParserCtxt); bHasLibXMLBug = pSchema == nullptr; bLibXMLBugChecked = true; if( pSchema ) xmlSchemaFree(pSchema); if( bHasLibXMLBug ) { CPLDebug( "CPL", "LibXML bug found " "(cf https://bugzilla.gnome.org/show_bug.cgi?id=630130). " "Will try to workaround for GML schemas." ); } return bHasLibXMLBug; } #endif /************************************************************************/ /* CPLExtractSubSchema() */ /************************************************************************/ static CPLXMLNode* CPLExtractSubSchema( CPLXMLNode* psSubXML, CPLXMLNode* psMainSchema ) { if( psSubXML->eType == CXT_Element && strcmp(psSubXML->pszValue, "?xml") == 0 ) { CPLXMLNode* psNext = psSubXML->psNext; psSubXML->psNext = nullptr; CPLDestroyXMLNode(psSubXML); psSubXML = psNext; } if( psSubXML != nullptr && psSubXML->eType == CXT_Comment ) { CPLXMLNode* psNext = psSubXML->psNext; psSubXML->psNext = nullptr; CPLDestroyXMLNode(psSubXML); psSubXML = psNext; } if( psSubXML != nullptr && psSubXML->eType == CXT_Element && (strcmp(psSubXML->pszValue, "schema") == 0 || strcmp(psSubXML->pszValue, "xs:schema") == 0 || strcmp(psSubXML->pszValue, "xsd:schema") == 0) && psSubXML->psNext == nullptr ) { CPLXMLNode* psNext = psSubXML->psChild; while( psNext != nullptr && psNext->eType != CXT_Element && psNext->psNext != nullptr && psNext->psNext->eType != CXT_Element ) { // Add xmlns: from subschema to main schema if missing. if( psNext->eType == CXT_Attribute && STARTS_WITH(psNext->pszValue, "xmlns:") && CPLGetXMLValue(psMainSchema, psNext->pszValue, nullptr) == nullptr ) { CPLXMLNode* psAttr = CPLCreateXMLNode(nullptr, CXT_Attribute, psNext->pszValue); CPLCreateXMLNode(psAttr, CXT_Text, psNext->psChild->pszValue); psAttr->psNext = psMainSchema->psChild; psMainSchema->psChild = psAttr; } psNext = psNext->psNext; } if( psNext != nullptr && psNext->eType != CXT_Element && psNext->psNext != nullptr && psNext->psNext->eType == CXT_Element ) { CPLXMLNode* psNext2 = psNext->psNext; psNext->psNext = nullptr; CPLDestroyXMLNode(psSubXML); psSubXML = psNext2; } } return psSubXML; } #ifdef HAS_VALIDATION_BUG /************************************************************************/ /* CPLWorkaroundLibXMLBug() */ /************************************************************************/ // Return TRUE if the current node must be destroyed. static bool CPLWorkaroundLibXMLBug( CPLXMLNode* psIter ) { if( psIter->eType == CXT_Element && strcmp(psIter->pszValue, "element") == 0 && strcmp(CPLGetXMLValue(psIter, "name", ""), "QuantityExtent") == 0 && strcmp(CPLGetXMLValue(psIter, "type", ""), "gml:QuantityExtentType") == 0 ) { CPLXMLNode* psIter2 = psIter->psChild; while( psIter2 ) { if( psIter2->eType == CXT_Attribute && strcmp(psIter2->pszValue, "type") == 0 ) { CPLFree(psIter2->psChild->pszValue); if( strcmp(CPLGetXMLValue(psIter, "substitutionGroup", ""), "gml:AbstractValue") == 0 ) // GML 3.2.1. psIter2->psChild->pszValue = CPLStrdup("gml:MeasureOrNilReasonListType"); else psIter2->psChild->pszValue = CPLStrdup("gml:MeasureOrNullListType"); } psIter2 = psIter2->psNext; } } else if( psIter->eType == CXT_Element && strcmp(psIter->pszValue, "element") == 0 && strcmp(CPLGetXMLValue(psIter, "name", ""), "CategoryExtent") == 0 && strcmp(CPLGetXMLValue(psIter, "type", ""), "gml:CategoryExtentType") == 0 ) { CPLXMLNode* psIter2 = psIter->psChild; while( psIter2 ) { if( psIter2->eType == CXT_Attribute && strcmp(psIter2->pszValue, "type") == 0 ) { CPLFree(psIter2->psChild->pszValue); if( strcmp(CPLGetXMLValue(psIter, "substitutionGroup", ""), "gml:AbstractValue") == 0 ) // GML 3.2.1 psIter2->psChild->pszValue = CPLStrdup("gml:CodeOrNilReasonListType"); else psIter2->psChild->pszValue = CPLStrdup("gml:CodeOrNullListType"); } psIter2 = psIter2->psNext; } } else if( CPLHasLibXMLBug() && psIter->eType == CXT_Element && strcmp(psIter->pszValue, "complexType") == 0 && (strcmp(CPLGetXMLValue(psIter, "name", ""), "QuantityExtentType") == 0 || strcmp(CPLGetXMLValue(psIter, "name", ""), "CategoryExtentType") == 0) ) { // Destroy this element. return true; } // For GML 3.2.1 else if( psIter->eType == CXT_Element && strcmp(psIter->pszValue, "complexType") == 0 && strcmp(CPLGetXMLValue(psIter, "name", ""), "VectorType") == 0 ) { CPLXMLNode* psSimpleContent = CPLCreateXMLNode(nullptr, CXT_Element, "simpleContent"); CPLXMLNode* psExtension = CPLCreateXMLNode(psSimpleContent, CXT_Element, "extension"); CPLXMLNode* psExtensionBase = CPLCreateXMLNode(psExtension, CXT_Attribute, "base"); CPLCreateXMLNode(psExtensionBase, CXT_Text, "gml:doubleList"); CPLXMLNode* psAttributeGroup = CPLCreateXMLNode(psExtension, CXT_Element, "attributeGroup"); CPLXMLNode* psAttributeGroupRef = CPLCreateXMLNode(psAttributeGroup, CXT_Attribute, "ref"); CPLCreateXMLNode(psAttributeGroupRef, CXT_Text, "gml:SRSReferenceGroup"); CPLXMLNode* psName = CPLCreateXMLNode(nullptr, CXT_Attribute, "name"); CPLCreateXMLNode(psName, CXT_Text, "VectorType"); CPLDestroyXMLNode(psIter->psChild); psIter->psChild = psName; psIter->psChild->psNext = psSimpleContent; } else if( psIter->eType == CXT_Element && strcmp(psIter->pszValue, "element") == 0 && (strcmp(CPLGetXMLValue(psIter, "name", ""), "domainOfValidity") == 0 || strcmp(CPLGetXMLValue(psIter, "name", ""), "coordinateOperationAccuracy") == 0 || strcmp(CPLGetXMLValue(psIter, "name", ""), "formulaCitation") == 0) ) { CPLXMLNode* psComplexType = CPLCreateXMLNode(nullptr, CXT_Element, "complexType"); CPLXMLNode* psSequence = CPLCreateXMLNode(psComplexType, CXT_Element, "sequence"); CPLXMLNode* psSequenceMinOccurs = CPLCreateXMLNode(psSequence, CXT_Attribute, "minOccurs"); CPLCreateXMLNode(psSequenceMinOccurs, CXT_Text, "0"); CPLXMLNode* psAny = CPLCreateXMLNode(psSequence, CXT_Element, "any"); CPLXMLNode* psAnyMinOccurs = CPLCreateXMLNode(psAny, CXT_Attribute, "minOccurs"); CPLCreateXMLNode(psAnyMinOccurs, CXT_Text, "0"); CPLXMLNode* psAnyProcessContents = CPLCreateXMLNode(psAny, CXT_Attribute, " processContents"); CPLCreateXMLNode(psAnyProcessContents, CXT_Text, "lax"); CPLXMLNode* psName = CPLCreateXMLNode(nullptr, CXT_Attribute, "name"); CPLCreateXMLNode(psName, CXT_Text, CPLGetXMLValue(psIter, "name", "")); CPLDestroyXMLNode(psIter->psChild); psIter->psChild = psName; psIter->psChild->psNext = psComplexType; } return false; } #endif /************************************************************************/ /* CPLLoadSchemaStrInternal() */ /************************************************************************/ static CPLXMLNode* CPLLoadSchemaStrInternal( CPLHashSet* hSetSchemas, const char* pszFile ) { if( CPLHashSetLookup(hSetSchemas, pszFile) ) return nullptr; CPLHashSetInsert(hSetSchemas, CPLStrdup(pszFile)); CPLDebug("CPL", "Parsing %s", pszFile); CPLXMLNode* psXML = CPLParseXMLFile(pszFile); if( psXML == nullptr ) { CPLError(CE_Failure, CPLE_AppDefined, "Cannot open %s", pszFile); return nullptr; } CPLXMLNode* psSchema = CPLGetXMLNode(psXML, "=schema"); if( psSchema == nullptr ) { psSchema = CPLGetXMLNode(psXML, "=xs:schema"); } if( psSchema == nullptr ) { psSchema = CPLGetXMLNode(psXML, "=xsd:schema"); } if( psSchema == nullptr ) { CPLError(CE_Failure, CPLE_AppDefined, "Cannot find schema node in %s", pszFile); CPLDestroyXMLNode(psXML); return nullptr; } CPLXMLNode* psPrev = nullptr; CPLXMLNode* psIter = psSchema->psChild; while( psIter ) { bool bDestroyCurrentNode = false; #ifdef HAS_VALIDATION_BUG if( CPLHasLibXMLBug() ) bDestroyCurrentNode = CPLWorkaroundLibXMLBug(psIter); #endif // Load the referenced schemas, and integrate them in the main schema. if( psIter->eType == CXT_Element && (strcmp(psIter->pszValue, "include") == 0 || strcmp(psIter->pszValue, "xs:include") == 0|| strcmp(psIter->pszValue, "xsd:include") == 0) && psIter->psChild != nullptr && psIter->psChild->eType == CXT_Attribute && strcmp(psIter->psChild->pszValue, "schemaLocation") == 0 ) { const char* pszIncludeSchema = psIter->psChild->psChild->pszValue; char* pszFullFilename = CPLStrdup( CPLFormFilename(CPLGetPath(pszFile), pszIncludeSchema, nullptr) ); CPLFixPath(pszFullFilename); CPLXMLNode* psSubXML = nullptr; // If we haven't yet loaded that schema, do it now. if( !CPLHashSetLookup(hSetSchemas, pszFullFilename) ) { psSubXML = CPLLoadSchemaStrInternal(hSetSchemas, pszFullFilename); if( psSubXML == nullptr ) { CPLFree(pszFullFilename); CPLDestroyXMLNode(psXML); return nullptr; } } CPLFree(pszFullFilename); pszFullFilename = nullptr; if( psSubXML ) { CPLXMLNode* psNext = psIter->psNext; psSubXML = CPLExtractSubSchema(psSubXML, psSchema); if( psSubXML == nullptr ) { CPLDestroyXMLNode(psXML); return nullptr; } // Replace <include/> node by the subXML. CPLXMLNode* psIter2 = psSubXML; while( psIter2->psNext ) psIter2 = psIter2->psNext; psIter2->psNext = psNext; if( psPrev == nullptr ) psSchema->psChild = psSubXML; else psPrev->psNext = psSubXML; psIter->psNext = nullptr; CPLDestroyXMLNode(psIter); psPrev = psIter2; psIter = psNext; continue; } else { // We have already included that file, // so just remove the <include/> node bDestroyCurrentNode = true; } } // Patch the schemaLocation of <import/>. else if( psIter->eType == CXT_Element && (strcmp(psIter->pszValue, "import") == 0 || strcmp(psIter->pszValue, "xs:import") == 0|| strcmp(psIter->pszValue, "xsd:import") == 0) ) { CPLXMLNode* psIter2 = psIter->psChild; while( psIter2 ) { if( psIter2->eType == CXT_Attribute && strcmp(psIter2->pszValue, "schemaLocation") == 0 && psIter2->psChild != nullptr && !STARTS_WITH(psIter2->psChild->pszValue, "http://") && !STARTS_WITH(psIter2->psChild->pszValue, "ftp://") && // If the top file is our warping file, don't alter the path // of the import. strstr(pszFile, "/vsimem/CPLValidateXML_") == nullptr ) { char* pszFullFilename = CPLStrdup(CPLFormFilename( CPLGetPath(pszFile), psIter2->psChild->pszValue, nullptr )); CPLFixPath(pszFullFilename); CPLFree(psIter2->psChild->pszValue); psIter2->psChild->pszValue = pszFullFilename; } psIter2 = psIter2->psNext; } } if( bDestroyCurrentNode ) { CPLXMLNode* psNext = psIter->psNext; if( psPrev == nullptr ) psSchema->psChild = psNext; else psPrev->psNext = psNext; psIter->psNext = nullptr; CPLDestroyXMLNode(psIter); psIter = psNext; continue; } psPrev = psIter; psIter = psIter->psNext; } return psXML; } /************************************************************************/ /* CPLMoveImportAtBeginning() */ /************************************************************************/ static void CPLMoveImportAtBeginning( CPLXMLNode* psXML ) { CPLXMLNode* psSchema = CPLGetXMLNode(psXML, "=schema"); if( psSchema == nullptr ) psSchema = CPLGetXMLNode(psXML, "=xs:schema"); if( psSchema == nullptr ) psSchema = CPLGetXMLNode(psXML, "=xsd:schema"); if( psSchema == nullptr ) return; CPLXMLNode* psPrev = nullptr; CPLXMLNode* psIter = psSchema->psChild; while( psIter ) { if( psPrev != nullptr && psIter->eType == CXT_Element && (strcmp(psIter->pszValue, "import") == 0 || strcmp(psIter->pszValue, "xs:import") == 0 || strcmp(psIter->pszValue, "xsd:import") == 0) ) { // Reorder at the beginning. CPLXMLNode* psNext = psIter->psNext; psPrev->psNext = psNext; CPLXMLNode* psFirstChild = psSchema->psChild; psSchema->psChild = psIter; psIter->psNext = psFirstChild; psIter = psNext; continue; } psPrev = psIter; psIter = psIter->psNext; } } /************************************************************************/ /* CPLLoadSchemaStr() */ /************************************************************************/ static char* CPLLoadSchemaStr( const char* pszXSDFilename ) { #ifdef HAS_VALIDATION_BUG CPLHasLibXMLBug(); #endif CPLHashSet* hSetSchemas = CPLHashSetNew(CPLHashSetHashStr, CPLHashSetEqualStr, CPLFree); CPLXMLNode* psSchema = CPLLoadSchemaStrInternal(hSetSchemas, pszXSDFilename); char* pszStr = nullptr; if( psSchema ) { CPLMoveImportAtBeginning(psSchema); pszStr = CPLSerializeXMLTree(psSchema); CPLDestroyXMLNode(psSchema); } CPLHashSetDestroy(hSetSchemas); return pszStr; } /************************************************************************/ /* CPLLibXMLInputStreamCPLFree() */ /************************************************************************/ static void CPLLibXMLInputStreamCPLFree( xmlChar* pszBuffer ) { CPLFree(pszBuffer); } /************************************************************************/ /* CPLFindLocalXSD() */ /************************************************************************/ static CPLString CPLFindLocalXSD( const char* pszXSDFilename ) { CPLString osTmp; const char *pszSchemasOpenGIS = CPLGetConfigOption("GDAL_OPENGIS_SCHEMAS", nullptr); if( pszSchemasOpenGIS != nullptr ) { int nLen = static_cast<int>(strlen(pszSchemasOpenGIS)); if( nLen > 0 && pszSchemasOpenGIS[nLen-1] == '/' ) { osTmp = pszSchemasOpenGIS; osTmp += pszXSDFilename; } else { osTmp = pszSchemasOpenGIS; osTmp += "/"; osTmp += pszXSDFilename; } } else if( (pszSchemasOpenGIS = CPLFindFile( "gdal", "SCHEMAS_OPENGIS_NET" )) != nullptr ) { osTmp = pszSchemasOpenGIS; osTmp += "/"; osTmp += pszXSDFilename; } VSIStatBufL sStatBuf; if( VSIStatExL(osTmp, &sStatBuf, VSI_STAT_EXISTS_FLAG) == 0 ) return osTmp; return ""; } /************************************************************************/ /* CPLExternalEntityLoader() */ /************************************************************************/ constexpr char szXML_XSD[] = "<schema xmlns=\"http://www.w3.org/2001/XMLSchema\" " "targetNamespace=\"http://www.w3.org/XML/1998/namespace\">" "<attribute name=\"lang\">" "<simpleType>" "<union memberTypes=\"language\">" "<simpleType>" "<restriction base=\"string\">" "<enumeration value=\"\"/>" "</restriction>" "</simpleType>" "</union>" "</simpleType>" "</attribute>" "<attribute name=\"space\">" "<simpleType>" "<restriction base=\"NCName\">" "<enumeration value=\"default\"/>" "<enumeration value=\"preserve\"/>" "</restriction>" "</simpleType>" "</attribute>" "<attribute name=\"base\" type=\"anyURI\"/>" "<attribute name=\"id\" type=\"ID\"/>" "<attributeGroup name=\"specialAttrs\">" "<attribute ref=\"xml:base\"/>" "<attribute ref=\"xml:lang\"/>" "<attribute ref=\"xml:space\"/>" "<attribute ref=\"xml:id\"/>" "</attributeGroup>" "</schema>"; // Simplified (and truncated) version of http://www.w3.org/1999/xlink.xsd // (sufficient for GML schemas). constexpr char szXLINK_XSD[] = "<schema xmlns=\"http://www.w3.org/2001/XMLSchema\" " "targetNamespace=\"http://www.w3.org/1999/xlink\" " "xmlns:xlink=\"http://www.w3.org/1999/xlink\">" "<attribute name=\"type\" type=\"string\"/>" "<attribute name=\"href\" type=\"anyURI\"/>" "<attribute name=\"role\" type=\"anyURI\"/>" "<attribute name=\"arcrole\" type=\"anyURI\"/>" "<attribute name=\"title\" type=\"string\"/>" "<attribute name=\"show\" type=\"string\"/>" "<attribute name=\"actuate\" type=\"string\"/>" "<attribute name=\"label\" type=\"NCName\"/>" "<attribute name=\"from\" type=\"NCName\"/>" "<attribute name=\"to\" type=\"NCName\"/>" "<attributeGroup name=\"simpleAttrs\">" "<attribute ref=\"xlink:type\" fixed=\"simple\"/>" "<attribute ref=\"xlink:href\"/>" "<attribute ref=\"xlink:role\"/>" "<attribute ref=\"xlink:arcrole\"/>" "<attribute ref=\"xlink:title\"/>" "<attribute ref=\"xlink:show\"/>" "<attribute ref=\"xlink:actuate\"/>" "</attributeGroup>" "</schema>"; static xmlParserInputPtr CPLExternalEntityLoader( const char * URL, const char * ID, xmlParserCtxtPtr context ) { #if DEBUG_VERBOSE CPLDebug("CPL", "CPLExternalEntityLoader(%s)", URL); #endif // Use libxml2 catalog mechanism to resolve the URL to something else. // xmlChar* pszResolved = xmlCatalogResolveSystem((const xmlChar*)URL); xmlChar* pszResolved = xmlCatalogResolveSystem(reinterpret_cast<const xmlChar *>(URL)); if( pszResolved == nullptr ) pszResolved = xmlCatalogResolveURI(reinterpret_cast<const xmlChar *>(URL)); CPLString osURL; if( pszResolved ) { CPLDebug( "CPL", "Resolving %s in %s", URL, reinterpret_cast<const char *>(pszResolved) ); osURL = reinterpret_cast<const char *>(pszResolved); URL = osURL.c_str(); xmlFree(pszResolved); pszResolved = nullptr; } if( STARTS_WITH(URL, "http://") ) { // Make sure to use http://schemas.opengis.net/ // when gml/2 or gml/3 is detected. const char* pszGML = strstr(URL, "gml/2"); if( pszGML == nullptr ) pszGML = strstr(URL, "gml/3"); if( pszGML != nullptr ) { osURL = "http://schemas.opengis.net/"; osURL += pszGML; URL = osURL.c_str(); } else if( strcmp(URL, "http://www.w3.org/2001/xml.xsd") == 0 ) { CPLString osTmp = CPLFindLocalXSD("xml.xsd"); if( !osTmp.empty() ) { osURL = osTmp; URL = osURL.c_str(); } else { CPLDebug( "CPL", "Resolving %s to local definition", "http://www.w3.org/2001/xml.xsd" ); return xmlNewStringInputStream( context, reinterpret_cast<const xmlChar*>(szXML_XSD) ); } } else if( strcmp(URL, "http://www.w3.org/1999/xlink.xsd") == 0 ) { CPLString osTmp = CPLFindLocalXSD("xlink.xsd"); if( !osTmp.empty() ) { osURL = osTmp; URL = osURL.c_str(); } else { CPLDebug( "CPL", "Resolving %s to local definition", "http://www.w3.org/1999/xlink.xsd" ); return xmlNewStringInputStream( context, reinterpret_cast<const xmlChar *>( szXLINK_XSD) ); } } else if( !STARTS_WITH(URL, "http://schemas.opengis.net/") ) { CPLDebug("CPL", "Loading %s", URL); return pfnLibXMLOldExtranerEntityLoader(URL, ID, context); } } else if( STARTS_WITH(URL, "ftp://") ) { return pfnLibXMLOldExtranerEntityLoader(URL, ID, context); } else if( STARTS_WITH(URL, "file://") ) { // Parse file:// URI so as to be able to open them with VSI*L API. if( STARTS_WITH(URL, "file://localhost/") ) URL += 16; else URL += 7; if( URL[0] == '/' && URL[1] != '\0' && URL[2] == ':' && URL[3] == '/' ) { // Windows. ++URL; } else if( URL[0] == '/' ) { // Unix. } else { return pfnLibXMLOldExtranerEntityLoader(URL, ID, context); } } CPLString osModURL; if( STARTS_WITH(URL, "/vsizip/vsicurl/http%3A//") ) { osModURL = "/vsizip/vsicurl/http://"; osModURL += URL + strlen("/vsizip/vsicurl/http%3A//"); } else if( STARTS_WITH(URL, "/vsicurl/http%3A//") ) { osModURL = "vsicurl/http://"; osModURL += URL + strlen("/vsicurl/http%3A//"); } else if( STARTS_WITH(URL, "http://schemas.opengis.net/") ) { const char *pszAfterOpenGIS = URL + strlen("http://schemas.opengis.net/"); const char *pszSchemasOpenGIS = CPLGetConfigOption("GDAL_OPENGIS_SCHEMAS", nullptr); if( pszSchemasOpenGIS != nullptr ) { const int nLen = static_cast<int>(strlen(pszSchemasOpenGIS)); if( nLen > 0 && pszSchemasOpenGIS[nLen-1] == '/' ) { osModURL = pszSchemasOpenGIS; osModURL += pszAfterOpenGIS; } else { osModURL = pszSchemasOpenGIS; osModURL += "/"; osModURL += pszAfterOpenGIS; } } else if( (pszSchemasOpenGIS = CPLFindFile( "gdal", "SCHEMAS_OPENGIS_NET" )) != nullptr ) { osModURL = pszSchemasOpenGIS; osModURL += "/"; osModURL += pszAfterOpenGIS; } else if( (pszSchemasOpenGIS = CPLFindFile( "gdal", "SCHEMAS_OPENGIS_NET.zip" )) != nullptr ) { osModURL = "/vsizip/"; osModURL += pszSchemasOpenGIS; osModURL += "/"; osModURL += pszAfterOpenGIS; } else { osModURL = "/vsizip/vsicurl/" "http://schemas.opengis.net/SCHEMAS_OPENGIS_NET.zip/"; osModURL += pszAfterOpenGIS; } } else { osModURL = URL; } xmlChar* pszBuffer = reinterpret_cast<xmlChar *>(CPLLoadSchemaStr(osModURL)); if( pszBuffer == nullptr ) return nullptr; xmlParserInputPtr poInputStream = xmlNewStringInputStream(context, pszBuffer); if( poInputStream != nullptr ) poInputStream->free = CPLLibXMLInputStreamCPLFree; return poInputStream; } /************************************************************************/ /* CPLLibXMLWarningErrorCallback() */ /************************************************************************/ static void CPLLibXMLWarningErrorCallback ( void * ctx, const char * msg, ... ) { va_list varg; va_start(varg, msg); char *pszStr = reinterpret_cast<char *>(va_arg( varg, char *)); if( strstr(pszStr, "since this namespace was already imported") == nullptr ) { xmlErrorPtr pErrorPtr = xmlGetLastError(); const char* pszFilename = static_cast<char *>(ctx); char* pszStrDup = CPLStrdup(pszStr); int nLen = static_cast<int>(strlen(pszStrDup)); if( nLen > 0 && pszStrDup[nLen-1] == '\n' ) pszStrDup[nLen-1] = '\0'; if( pszFilename != nullptr && pszFilename[0] != '<' ) { CPLError(CE_Failure, CPLE_AppDefined, "libXML: %s:%d: %s", pszFilename, pErrorPtr ? pErrorPtr->line : 0, pszStrDup); } else { CPLError(CE_Failure, CPLE_AppDefined, "libXML: %d: %s", pErrorPtr ? pErrorPtr->line : 0, pszStrDup); } CPLFree(pszStrDup); } va_end(varg); } /************************************************************************/ /* CPLLoadContentFromFile() */ /************************************************************************/ static char* CPLLoadContentFromFile( const char* pszFilename ) { VSILFILE* fp = VSIFOpenL(pszFilename, "rb"); if( fp == nullptr ) return nullptr; if( VSIFSeekL(fp, 0, SEEK_END) != 0 ) { CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); return nullptr; } vsi_l_offset nSize = VSIFTellL(fp); if( VSIFSeekL(fp, 0, SEEK_SET) != 0 ) { CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); return nullptr; } if( static_cast<vsi_l_offset>(static_cast<int>(nSize)) != nSize || nSize > INT_MAX - 1 ) { CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); return nullptr; } char* pszBuffer = static_cast<char *>(VSIMalloc(static_cast<size_t>(nSize) + 1)); if( pszBuffer == nullptr ) { CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); return nullptr; } if( static_cast<size_t>(VSIFReadL(pszBuffer, 1, static_cast<size_t>(nSize), fp)) != static_cast<size_t>(nSize) ) { VSIFree(pszBuffer); CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); return nullptr; } pszBuffer[nSize] = '\0'; CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); return pszBuffer; } /************************************************************************/ /* CPLLoadXMLSchema() */ /************************************************************************/ typedef void* CPLXMLSchemaPtr; /** * \brief Load a XSD schema. * * The return value should be freed with CPLFreeXMLSchema(). * * @param pszXSDFilename XSD schema to load. * @return a handle to the parsed XML schema, or NULL in case of failure. * * @since GDAL 1.10.0 */ static CPLXMLSchemaPtr CPLLoadXMLSchema( const char* pszXSDFilename ) { char* pszStr = CPLLoadSchemaStr(pszXSDFilename); if( pszStr == nullptr ) return nullptr; xmlExternalEntityLoader pfnLibXMLOldExtranerEntityLoaderLocal = nullptr; pfnLibXMLOldExtranerEntityLoaderLocal = xmlGetExternalEntityLoader(); pfnLibXMLOldExtranerEntityLoader = pfnLibXMLOldExtranerEntityLoaderLocal; xmlSetExternalEntityLoader(CPLExternalEntityLoader); xmlSchemaParserCtxtPtr pSchemaParserCtxt = xmlSchemaNewMemParserCtxt(pszStr, static_cast<int>(strlen(pszStr))); xmlSchemaSetParserErrors(pSchemaParserCtxt, CPLLibXMLWarningErrorCallback, CPLLibXMLWarningErrorCallback, nullptr); xmlSchemaPtr pSchema = xmlSchemaParse(pSchemaParserCtxt); xmlSchemaFreeParserCtxt(pSchemaParserCtxt); xmlSetExternalEntityLoader(pfnLibXMLOldExtranerEntityLoaderLocal); CPLFree(pszStr); return static_cast<CPLXMLSchemaPtr>( pSchema ); } /************************************************************************/ /* CPLFreeXMLSchema() */ /************************************************************************/ /** * \brief Free a XSD schema. * * @param pSchema a handle to the parsed XML schema. * * @since GDAL 1.10.0 */ static void CPLFreeXMLSchema( CPLXMLSchemaPtr pSchema ) { if( pSchema ) xmlSchemaFree(static_cast<xmlSchemaPtr>(pSchema)); } /************************************************************************/ /* CPLValidateXML() */ /************************************************************************/ /** * \brief Validate a XML file against a XML schema. * * @param pszXMLFilename the filename of the XML file to validate. * @param pszXSDFilename the filename of the XSD schema. * @param papszOptions unused for now. Set to NULL. * @return TRUE if the XML file validates against the XML schema. * * @since GDAL 1.10.0 */ int CPLValidateXML( const char* pszXMLFilename, const char* pszXSDFilename, CPL_UNUSED CSLConstList papszOptions ) { char szHeader[2048] = {}; // TODO(schwehr): Get this off of the stack. CPLString osTmpXSDFilename; if( pszXMLFilename[0] == '<' ) { strncpy(szHeader, pszXMLFilename, sizeof(szHeader)); szHeader[sizeof(szHeader)-1] = '\0'; } else { VSILFILE* fpXML = VSIFOpenL(pszXMLFilename, "rb"); if( fpXML == nullptr ) { CPLError( CE_Failure, CPLE_OpenFailed, "Cannot open %s", pszXMLFilename ); return FALSE; } const vsi_l_offset nRead = VSIFReadL(szHeader, 1, sizeof(szHeader) - 1, fpXML); szHeader[nRead] = '\0'; CPL_IGNORE_RET_VAL(VSIFCloseL(fpXML)); } // Workaround following bug: // // "element FeatureCollection: Schemas validity error : Element // '{http://www.opengis.net/wfs}FeatureCollection': No matching global // declaration available for the validation root" // // We create a wrapping XSD that imports the WFS .xsd (and possibly the GML // .xsd too) and the application schema. This is a known libxml2 // limitation. if( strstr(szHeader, "<wfs:FeatureCollection") || (strstr(szHeader, "<FeatureCollection") && strstr(szHeader, "xmlns:wfs=\"http://www.opengis.net/wfs\"")) ) { const char* pszWFSSchemaNamespace = "http://www.opengis.net/wfs"; const char* pszWFSSchemaLocation = nullptr; const char* pszGMLSchemaLocation = nullptr; if( strstr(szHeader, "wfs/1.0.0/WFS-basic.xsd") ) { pszWFSSchemaLocation = "http://schemas.opengis.net/wfs/1.0.0/WFS-basic.xsd"; } else if( strstr(szHeader, "wfs/1.1.0/wfs.xsd") ) { pszWFSSchemaLocation = "http://schemas.opengis.net/wfs/1.1.0/wfs.xsd"; } else if( strstr(szHeader, "wfs/2.0/wfs.xsd") ) { pszWFSSchemaNamespace = "http://www.opengis.net/wfs/2.0"; pszWFSSchemaLocation = "http://schemas.opengis.net/wfs/2.0/wfs.xsd"; } VSILFILE* fpXSD = VSIFOpenL(pszXSDFilename, "rb"); if( fpXSD == nullptr ) { CPLError(CE_Failure, CPLE_OpenFailed, "Cannot open %s", pszXSDFilename); return FALSE; } const vsi_l_offset nRead = VSIFReadL(szHeader, 1, sizeof(szHeader) - 1, fpXSD); szHeader[nRead] = '\0'; CPL_IGNORE_RET_VAL(VSIFCloseL(fpXSD)); if( strstr(szHeader, "gml/3.1.1") != nullptr && strstr(szHeader, "gml/3.1.1/base/gml.xsd") == nullptr ) { pszGMLSchemaLocation = "http://schemas.opengis.net/gml/3.1.1/base/gml.xsd"; } if( pszWFSSchemaLocation != nullptr ) { osTmpXSDFilename = CPLSPrintf( "/vsimem/CPLValidateXML_%p_%p.xsd", pszXMLFilename, pszXSDFilename ); char * const pszEscapedXSDFilename = CPLEscapeString(pszXSDFilename, -1, CPLES_XML); VSILFILE * const fpMEM = VSIFOpenL(osTmpXSDFilename, "wb"); CPL_IGNORE_RET_VAL(VSIFPrintfL( fpMEM, "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\">\n")); CPL_IGNORE_RET_VAL(VSIFPrintfL( fpMEM, " <xs:import namespace=\"%s\" schemaLocation=\"%s\"/>\n", pszWFSSchemaNamespace, pszWFSSchemaLocation)); CPL_IGNORE_RET_VAL(VSIFPrintfL( fpMEM, " <xs:import namespace=\"ignored\" schemaLocation=\"%s\"/>\n", pszEscapedXSDFilename)); if( pszGMLSchemaLocation ) CPL_IGNORE_RET_VAL(VSIFPrintfL( fpMEM, " <xs:import namespace=\"http://www.opengis.net/gml\" " "schemaLocation=\"%s\"/>\n", pszGMLSchemaLocation)); CPL_IGNORE_RET_VAL(VSIFPrintfL(fpMEM, "</xs:schema>\n")); CPL_IGNORE_RET_VAL(VSIFCloseL(fpMEM)); CPLFree(pszEscapedXSDFilename); } } CPLXMLSchemaPtr pSchema = CPLLoadXMLSchema(!osTmpXSDFilename.empty() ? osTmpXSDFilename.c_str() : pszXSDFilename); if( !osTmpXSDFilename.empty() ) VSIUnlink(osTmpXSDFilename); if( pSchema == nullptr ) return FALSE; xmlSchemaValidCtxtPtr pSchemaValidCtxt = xmlSchemaNewValidCtxt(static_cast<xmlSchemaPtr>(pSchema)); if( pSchemaValidCtxt == nullptr ) { CPLFreeXMLSchema(pSchema); return FALSE; } xmlSchemaSetValidErrors(pSchemaValidCtxt, CPLLibXMLWarningErrorCallback, CPLLibXMLWarningErrorCallback, const_cast<char *>(pszXMLFilename) ); bool bValid = false; if( pszXMLFilename[0] == '<' ) { xmlDocPtr pDoc = xmlParseDoc(reinterpret_cast<const xmlChar *>(pszXMLFilename)); if( pDoc != nullptr ) { bValid = xmlSchemaValidateDoc(pSchemaValidCtxt, pDoc) == 0; } xmlFreeDoc(pDoc); } else if( !STARTS_WITH(pszXMLFilename, "/vsi") ) { bValid = xmlSchemaValidateFile(pSchemaValidCtxt, pszXMLFilename, 0) == 0; } else { char* pszXML = CPLLoadContentFromFile(pszXMLFilename); if( pszXML != nullptr ) { xmlDocPtr pDoc = xmlParseDoc(reinterpret_cast<const xmlChar *>(pszXML)); if( pDoc != nullptr ) { bValid = xmlSchemaValidateDoc(pSchemaValidCtxt, pDoc) == 0; } xmlFreeDoc(pDoc); } CPLFree(pszXML); } xmlSchemaFreeValidCtxt(pSchemaValidCtxt); CPLFreeXMLSchema(pSchema); return bValid; } #else // HAVE_RECENT_LIBXML2 /************************************************************************/ /* CPLValidateXML() */ /************************************************************************/ int CPLValidateXML( const char* /* pszXMLFilename */, const char* /* pszXSDFilename */, CSLConstList /* papszOptions */ ) { CPLError( CE_Failure, CPLE_NotSupported, "%s not implemented due to missing libxml2 support", "CPLValidateXML()" ); return FALSE; } #endif // HAVE_RECENT_LIBXML2