diff --git a/resources/context_help/QgsDelimitedTextSourceSelect-en_US b/resources/context_help/QgsDelimitedTextSourceSelect-en_US index c3e6e0ff8f57..48e65cbd98f4 100644 --- a/resources/context_help/QgsDelimitedTextSourceSelect-en_US +++ b/resources/context_help/QgsDelimitedTextSourceSelect-en_US @@ -182,7 +182,7 @@ or are duplicated.

In addition to the attributes explicitly in the data file QGIS assigns a unique -feature id to each record. This is the line number in the source file on which +feature id to each record which is the line number in the source file on which the record starts.

@@ -275,7 +275,10 @@ The following options can be added

  • crs=... specifies the coordinate system to use for the vector layer, in a format accepted by QgsCoordinateReferenceSystem.createFromString (for example "EPSG:4167"). If this is not specified then a dialog box may request this information from the user when the layer is loaded (depending on QGIS CRS settings).
  • -
  • quiet=(yes|no) specifies whether errors encountered loading the layer are presented in a dialog box (they will be written to the QGIS log in any case). The default is no.
  • +
  • subsetIndex=(yes|no) specifies whether the provider should build an index to define subset during the initial file scan. The index will apply both for explicitly defined subsets, and for the implicit subset of features for which the geometry definition is valid. By default the subset index is built if it is applicable. This option is not available from the GUI.
  • +
  • spatialIndex=(yes|no) specifies whether the provider should build a spatial index during the initial file scan. By default the spatial index is not built.
  • +
  • useWatcher=(yes|no) specifies whether the provider should use a file system watcher to monitor for changes to the file. This option is not available from the GUI
  • +
  • quiet=(yes|no) specifies whether errors encountered loading the layer are presented in a dialog box (they will be written to the QGIS log in any case). The default is no. This option is not available from the GUI
  • diff --git a/src/core/qgsvectorlayer.h b/src/core/qgsvectorlayer.h index 497beadbcee6..e29991d84195 100644 --- a/src/core/qgsvectorlayer.h +++ b/src/core/qgsvectorlayer.h @@ -375,6 +375,20 @@ struct CORE_EXPORT QgsVectorJoinInfo * Defines the coordinate reference system used for the layer. This can be * any string accepted by QgsCoordinateReferenceSystem::createFromString() * + * -subsetIndex=(yes|no) + * + * Determines whether the provider generates an index to improve the efficiency + * of subsets. The default is yes + * + * -spatialIndex=(yes|no) + * + * Determines whether the provider generates a spatial index. The default is no. + * + * -useWatcher=(yes|no) + * + * Defines whether the file will be monitored for changes. The default is + * to monitor for changes. + * * - quiet * * Errors encountered loading the file will not be reported in a user dialog if diff --git a/src/providers/delimitedtext/CMakeLists.txt b/src/providers/delimitedtext/CMakeLists.txt index 9f53432e393a..22da50ae6ecd 100644 --- a/src/providers/delimitedtext/CMakeLists.txt +++ b/src/providers/delimitedtext/CMakeLists.txt @@ -10,6 +10,7 @@ SET (DTEXT_SRCS ) SET (DTEXT_MOC_HDRS + qgsdelimitedtextfile.h qgsdelimitedtextprovider.h qgsdelimitedtextsourceselect.h ) diff --git a/src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.cpp b/src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.cpp index 7d660ed9d39e..fd3371dcc023 100644 --- a/src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.cpp +++ b/src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.cpp @@ -16,9 +16,13 @@ #include "qgsdelimitedtextprovider.h" #include "qgsdelimitedtextfile.h" +#include "qgsexpression.h" #include "qgsgeometry.h" +#include "qgslogger.h" #include "qgsmessagelog.h" +#include "qgsspatialindex.h" +#include #include QgsDelimitedTextFeatureIterator::QgsDelimitedTextFeatureIterator( QgsDelimitedTextProvider* p, const QgsFeatureRequest& request ) @@ -32,6 +36,106 @@ QgsDelimitedTextFeatureIterator::QgsDelimitedTextFeatureIterator( QgsDelimitedTe } P->mActiveIterator = this; + // Determine mode to use based on request... + + QgsDebugMsg( "Setting up QgsDelimitedTextIterator" ); + + // Does the layer have geometry - will revise later to determine if we actually need to + // load it. + mLoadGeometry = P->mGeomRep != QgsDelimitedTextProvider::GeomNone; + + // Does the layer have an explicit or implicit subset (implicit subset is if we have geometry which can + // be invalid) + + mTestSubset = P->mSubsetExpression; + mTestGeometry = false; + + mMode = FileScan; + if ( request.filterType() == QgsFeatureRequest::FilterFid ) + { + QgsDebugMsg( "Configuring for returning single id" ); + mFeatureIds.append( request.filterFid() ); + mMode = FeatureIds; + mTestSubset = false; + } + // If have geometry and testing geometry then evaluate options... + // If we don't have geometry then all records pass geometry filter. + // CC: 2013-05-09 + // Not sure about intended relationship between filtering on geometry and + // requesting no geometry? Have preserved current logic of ignoring spatial filter + // if not requesting geometry. + + else if ( request.filterType() == QgsFeatureRequest::FilterRect && mLoadGeometry + && !( mRequest.flags() & QgsFeatureRequest::NoGeometry ) ) + { + QgsDebugMsg( "Configuring for rectangle select" ); + mTestGeometry = true; + // Exact intersection test only applies for WKT geometries + mTestGeometryExact = mRequest.flags() & QgsFeatureRequest::ExactIntersect + && P->mGeomRep == QgsDelimitedTextProvider::GeomAsWkt; + + QgsRectangle rect = request.filterRect(); + + // If request doesn't overlap extents, then nothing to return + if ( ! rect.intersects( P->extent() ) ) + { + QgsDebugMsg( "Rectangle outside layer extents - no features to return" ); + mMode = FeatureIds; + } + // If the request extents include the entire layer, then revert to + // a file scan + + else if ( rect.contains( P->extent() ) ) + { + QgsDebugMsg( "Rectangle contains layer extents - bypass spatial filter" ); + mTestGeometry = false; + } + // If we have a spatial index then use it. The spatial index already accounts + // for the subset. Also means we don't have to test geometries unless doing exact + // intersection + + else if ( P->mUseSpatialIndex ) + { + mFeatureIds = P->mSpatialIndex->intersects( rect ); + // Sort for efficient sequential retrieval + qSort(mFeatureIds.begin(), mFeatureIds.end()); + QgsDebugMsg( QString("Layer has spatial index - selected %1 features from index").arg(mFeatureIds.size()) ); + mMode = FeatureIds; + mTestSubset = false; + mTestGeometry = mTestGeometryExact; + } + } + + // If we have a subset index then use it.. + if ( mMode == FileScan && P->mUseSubsetIndex ) + { + QgsDebugMsg( QString("Layer has subset index - use %1 items from subset index").arg(P->mSubsetIndex.size()) ); + mTestSubset = false; + mMode = SubsetIndex; + } + + // Otherwise just have to scan the file + if( mMode == FileScan ) + { + QgsDebugMsg( "File will be scanned for desired features" ); + } + + // If the request does not require geometry, can we avoid loading it? + // We need it if we are testing geometry (ie spatial filter), or + // if testing the subset expression, and it uses geometry. + if ( mRequest.flags() & QgsFeatureRequest::NoGeometry && + ! mTestGeometry && + ! ( mTestSubset && P->mSubsetExpression->needsGeometry() ) ) + { + QgsDebugMsg( "Feature geometries not required" ); + mLoadGeometry = false; + } + + QgsDebugMsg( QString("Iterator is scanning file: ") + (scanningFile() ? "Yes" : "No")); + QgsDebugMsg( QString("Iterator is loading geometries: ") + (loadGeometry() ? "Yes" : "No")); + QgsDebugMsg( QString("Iterator is testing geometries: ") + (testGeometry() ? "Yes" : "No")); + QgsDebugMsg( QString("Iterator is testing subset: ") + (testSubset() ? "Yes" : "No")); + rewind(); } @@ -49,8 +153,40 @@ bool QgsDelimitedTextFeatureIterator::nextFeature( QgsFeature& feature ) if ( mClosed ) return false; - bool gotFeature = P->nextFeature( feature, P->mFile, mRequest ); + bool gotFeature = false; + if ( mMode == FileScan ) + { + gotFeature = P->nextFeature( feature, P->mFile, this ); + } + else + { + while( ! gotFeature ) + { + qint64 fid = -1; + if ( mMode == FeatureIds ) + { + if( mNextId < mFeatureIds.size() ) + { + fid = mFeatureIds[mNextId]; + } + } + else if( mNextId < P->mSubsetIndex.size() ) + { + fid = P->mSubsetIndex[mNextId]; + } + if( fid < 0 ) break; + mNextId++; + gotFeature = (P->setNextFeatureId( fid ) && P->nextFeature( feature, P->mFile, this )); + } + } + + // CC: 2013-05-08: What is the intent of rewind/close. The following + // line from previous implementation means that we cannot rewind the iterator + // after reading last record? Is this correct? This line can be removed if + // not. + if ( ! gotFeature ) close(); + return gotFeature; } @@ -60,7 +196,14 @@ bool QgsDelimitedTextFeatureIterator::rewind() return false; // Skip to first data record - P->resetStream(); + if ( mMode == FileScan ) + { + P->resetStream(); + } + else + { + mNextId = 0; + } return true; } @@ -71,6 +214,29 @@ bool QgsDelimitedTextFeatureIterator::close() // tell provider that this iterator is not active anymore P->mActiveIterator = 0; + mFeatureIds = QList(); mClosed = true; return true; } + +/** + * Check to see if the point is within the selection rectangle + */ +bool QgsDelimitedTextFeatureIterator::wantGeometry( const QgsPoint &pt ) const +{ + if ( ! mTestGeometry ) return true; + return mRequest.filterRect().contains( pt ); +} + +/** + * Check to see if the geometry is within the selection rectangle + */ +bool QgsDelimitedTextFeatureIterator::wantGeometry( QgsGeometry *geom ) const +{ + if ( ! mTestGeometry ) return true; + + if ( mTestGeometryExact ) + return geom->intersects( mRequest.filterRect() ); + else + return geom->boundingBox().intersects( mRequest.filterRect() ); +} diff --git a/src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.h b/src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.h index 1b1e768a6199..d64ba0dee40c 100644 --- a/src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.h +++ b/src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.h @@ -15,12 +15,20 @@ #ifndef QGSDELIMITEDTEXTFEATUREITERATOR_H #define QGSDELIMITEDTEXTFEATUREITERATOR_H +#include #include "qgsfeatureiterator.h" +#include "qgsfeature.h" class QgsDelimitedTextProvider; class QgsDelimitedTextFeatureIterator : public QgsAbstractFeatureIterator { + enum IteratorMode + { + FileScan, + SubsetIndex, + FeatureIds + }; public: QgsDelimitedTextFeatureIterator( QgsDelimitedTextProvider* p, const QgsFeatureRequest& request ); @@ -35,8 +43,29 @@ class QgsDelimitedTextFeatureIterator : public QgsAbstractFeatureIterator //! end of iterating: free the resources / lock virtual bool close(); + // Flags used by nextFeature function of QgsDelimitedTextProvider + bool testSubset() const { return mTestSubset; } + bool testGeometry() const { return mTestGeometry; } + bool loadGeometry() const { return mLoadGeometry; } + bool loadSubsetOfAttributes() const { return ! mTestSubset && mRequest.flags() & QgsFeatureRequest::SubsetOfAttributes;} + bool scanningFile() const { return mMode == FileScan; } + + // Pass through attribute subset + const QgsAttributeList &subsetOfAttributes() const { return mRequest.subsetOfAttributes(); } + + // Tests whether the geometry is required, given that testGeometry is true. + bool wantGeometry( const QgsPoint & point ) const; + bool wantGeometry( QgsGeometry *geom ) const; + protected: QgsDelimitedTextProvider* P; + QList mFeatureIds; + IteratorMode mMode; + long mNextId; + bool mTestSubset; + bool mTestGeometry; + bool mTestGeometryExact; + bool mLoadGeometry; }; diff --git a/src/providers/delimitedtext/qgsdelimitedtextfile.cpp b/src/providers/delimitedtext/qgsdelimitedtextfile.cpp index 47458c17d4df..1083d12110e0 100644 --- a/src/providers/delimitedtext/qgsdelimitedtextfile.cpp +++ b/src/providers/delimitedtext/qgsdelimitedtextfile.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,8 @@ QgsDelimitedTextFile::QgsDelimitedTextFile( QString url ) : mEncoding( "UTF-8" ), mFile( 0 ), mStream( 0 ), + mUseWatcher( true ), + mWatcher( 0 ), mDefinitionValid( false ), mUseHeader( true ), mDiscardEmptyFields( false ), @@ -44,8 +47,11 @@ QgsDelimitedTextFile::QgsDelimitedTextFile( QString url ) : mSkipLines( 0 ), mMaxFields( 0 ), mMaxNameLength( 200 ), // Don't want field names to be too unweildy! - mLineNumber( 0 ), - mRecordLineNumber( 0 ), + mLineNumber( -1 ), + mRecordLineNumber( -1 ), + mRecordNumber( -1 ), + mHoldCurrentRecord( false ), + mMaxRecordNumber( -1 ), mMaxFieldCount( 0 ) { // The default type is CSV @@ -71,6 +77,11 @@ void QgsDelimitedTextFile::close() delete mFile; mFile = 0; } + if ( mWatcher ) + { + delete mWatcher; + mWatcher = 0; + } } bool QgsDelimitedTextFile::open() @@ -92,10 +103,25 @@ bool QgsDelimitedTextFile::open() QTextCodec *codec = QTextCodec::codecForName( mEncoding.toAscii() ); mStream->setCodec( codec ); } + mMaxRecordNumber = -1; + mHoldCurrentRecord = false; + if ( mWatcher ) delete mWatcher; + if( mUseWatcher ) + { + mWatcher = new QFileSystemWatcher( this ); + mWatcher->addPath( mFileName ); + connect( mWatcher, SIGNAL( fileChanged( QString ) ), this, SLOT( updateFile() ) ); + } } return true; } +void QgsDelimitedTextFile::updateFile() +{ + close(); + emit( fileUpdated() ); +} + // Clear information based on current definition of file void QgsDelimitedTextFile::resetDefinition() { @@ -126,6 +152,12 @@ bool QgsDelimitedTextFile::setFromUrl( QUrl &url ) mEncoding = url.queryItemValue( "encoding" ); } + // + if ( url.hasQueryItem( "useWatcher" ) ) + { + mUseWatcher = ! url.queryItemValue( "useWatcher" ).toUpper().startsWith( 'N' );; + } + // The default type is csv, to be consistent with the // previous implementation (except that quoting should be handled properly) @@ -181,7 +213,7 @@ bool QgsDelimitedTextFile::setFromUrl( QUrl &url ) } if ( url.hasQueryItem( "skipEmptyFields" ) ) { - mDiscardEmptyFields = ! url.queryItemValue( "skipEmptyFields" ).toUpper().startsWith( 'N' );; + mDiscardEmptyFields = ! url.queryItemValue( "skipEmptyFields" ).toUpper().startsWith( 'N' ); } if ( url.hasQueryItem( "trimFields" ) ) { @@ -231,6 +263,9 @@ QUrl QgsDelimitedTextFile::url() { url.addQueryItem( "encoding", mEncoding ); } + + if( ! mUseWatcher ) url.addQueryItem( "useWatcher", "no"); + url.addQueryItem( "type", type() ); if ( mType == DelimTypeRegexp ) { @@ -277,6 +312,12 @@ void QgsDelimitedTextFile::setEncoding( QString encoding ) mEncoding = encoding; } +void QgsDelimitedTextFile::setUseWatcher(bool useWatcher) +{ + resetDefinition(); + mUseWatcher = useWatcher; +} + QString QgsDelimitedTextFile::type() { if ( mType == DelimTypeWhitespace ) return QString( "whitespace" ); @@ -372,7 +413,7 @@ void QgsDelimitedTextFile::setDiscardEmptyFields( bool discardEmptyFields ) void QgsDelimitedTextFile::setFieldNames( const QStringList &names ) { - mFieldNames.empty(); + mFieldNames.clear(); foreach ( QString name, names ) { bool nameOk = true; @@ -455,9 +496,47 @@ int QgsDelimitedTextFile::fieldIndex( QString name ) } +bool QgsDelimitedTextFile::setNextRecordId(long nextRecordId ) +{ + mHoldCurrentRecord = nextRecordId == mRecordLineNumber; + if( mHoldCurrentRecord ) return true; + return setNextLineNumber( nextRecordId ); +} + QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextRecord( QStringList &record ) { - return ( this->*mParser )( record ); + + record.clear(); + Status status = RecordOk; + + if( mHoldCurrentRecord ) + { + mHoldCurrentRecord = false; + } + else + { + // Invalidate the record line number, in get EOF + mRecordLineNumber = -1; + + // Find the first non-blank line to read + QString buffer; + status = nextLine( buffer, true ); + if ( status != RecordOk ) return status; + + mCurrentRecord.clear(); + mRecordLineNumber = mLineNumber; + if ( mRecordNumber >= 0 ) + { + mRecordNumber++; + if ( mRecordNumber > mMaxRecordNumber ) mMaxRecordNumber = mRecordNumber; + } + status = (this->*mParser )( buffer, mCurrentRecord ); + } + if( status == RecordOk ) + { + record.append(mCurrentRecord); + } + return status; } @@ -469,7 +548,8 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::reset() // Reset the file pointer mStream->seek( 0 ); mLineNumber = 0; - mRecordLineNumber = 0; + mRecordNumber = -1; + mRecordLineNumber = -1; // Skip header lines for ( int i = mSkipLines; i-- > 0; ) @@ -478,14 +558,15 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::reset() mLineNumber++; } // Read the column names + Status result = RecordOk; if ( mUseHeader ) { QStringList names; - QgsDelimitedTextFile::Status result = nextRecord( names ); + result = nextRecord( names ); setFieldNames( names ); - return result; } - return RecordOk; + if( result == RecordOk ) mRecordNumber = 0; + return result; } QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextLine( QString &buffer, bool skipBlank ) @@ -509,6 +590,24 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextLine( QString &buffer, bo return RecordEOF; } +bool QgsDelimitedTextFile::setNextLineNumber( long nextLineNumber ) +{ + if ( ! mStream ) return false; + if ( mLineNumber > nextLineNumber-1 ) + { + mRecordNumber = -1; + mStream->seek(0); + mLineNumber = 0; + } + QString buffer; + while( mLineNumber < nextLineNumber-1 ) + { + if( nextLine(buffer,false) != RecordOk ) return false; + } + return true; + +} + void QgsDelimitedTextFile::appendField( QStringList &record, QString field, bool quoted ) { if ( mMaxFields > 0 && record.size() >= mMaxFields ) return; @@ -522,16 +621,14 @@ void QgsDelimitedTextFile::appendField( QStringList &record, QString field, bool if ( !( mDiscardEmptyFields && field.isEmpty() ) ) record.append( field ); } // Keep track of maximum number of non-empty fields in a record - if ( record.size() > mMaxFieldCount && ! field.isEmpty() ) mMaxFieldCount = record.size(); + if ( record.size() > mMaxFieldCount && ! field.isEmpty() ) + { + mMaxFieldCount = record.size(); + } } -QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QStringList &fields ) +QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QString &buffer, QStringList &fields ) { - fields.clear(); - QString buffer; - Status status = nextLine( buffer, true ); - if ( status != RecordOk ) return status; - mRecordLineNumber = mLineNumber; // If match is anchored, then only interested in records which actually match // and extract capture groups @@ -586,16 +683,9 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QStringList &fie return RecordOk; } -QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseQuoted( QStringList &fields ) +QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseQuoted( QString &buffer, QStringList &fields ) { - fields.clear(); - - // Find the first non-blank line to read - QString buffer; - Status status = nextLine( buffer, true ); - if ( status != RecordOk ) return status; - mRecordLineNumber = mLineNumber; - + Status status = RecordOk; QString field; // String in which to accumulate next field bool escaped = false; // Next char is escaped bool quoted = false; // In quotes diff --git a/src/providers/delimitedtext/qgsdelimitedtextfile.h b/src/providers/delimitedtext/qgsdelimitedtextfile.h index 0e3266aa4e11..6c10054d400d 100644 --- a/src/providers/delimitedtext/qgsdelimitedtextfile.h +++ b/src/providers/delimitedtext/qgsdelimitedtextfile.h @@ -1,5 +1,5 @@ /*************************************************************************** - qgsdelimitedtextparser.h - File for delimited text file + qgsdelimitedtextfile.h - File for delimited text file ------------------- begin : 2004-02-27 copyright : (C) 2013 by Chris Crook @@ -15,6 +15,9 @@ * * ***************************************************************************/ +#ifndef QGSDELIMITEDTEXTFILE_H +#define QGSDELIMITEDTEXTFILE_H + #include #include #include @@ -22,6 +25,7 @@ class QgsFeature; class QgsField; class QFile; +class QFileSystemWatcher; class QTextStream; @@ -66,9 +70,11 @@ class QTextStream; // on an abstract base class in order to facilitate changing the type of the parser easily // eg in the provider dialog -class QgsDelimitedTextFile +class QgsDelimitedTextFile : public QObject { + Q_OBJECT + public: enum Status @@ -84,7 +90,7 @@ class QgsDelimitedTextFile { DelimTypeWhitespace, DelimTypeCSV, - DelimTypeRegexp, + DelimTypeRegexp }; QgsDelimitedTextFile( QString url = QString() ); @@ -238,11 +244,22 @@ class QgsDelimitedTextFile /** Return the line number of the start of the last record read * @return linenumber The line number of the start of the record */ - int recordLineNumber() + int recordId() { return mRecordLineNumber; } + /** Set the index of the next record to return. + * @param nextRecordId The id to set the next record to + * @return valid True if the next record can be located + */ + bool setNextRecordId( long nextRecordId ); + + /** Number record number of records visited. After scanning the file + * serves as a record count. + * @return maxRecordNumber The maximum record number + */ + long recordCount() { return mMaxRecordNumber; } /** Reset the file to reread from the beginning */ Status reset(); @@ -272,6 +289,22 @@ class QgsDelimitedTextFile */ static QString decodeChars( QString string ); + /** Set to use or not use a QFileWatcher to notify of changes to the file + * @param useWatcher True to use a watcher, false otherwise + */ + + void setUseWatcher( bool useWatcher ); + + signals: + /** Signal sent when the file is updated by another process + */ + void fileUpdated(); + + public slots: + /** Slot used by watcher to notify of file updates + */ + void updateFile(); + private: /** Open the file @@ -290,9 +323,9 @@ class QgsDelimitedTextFile void resetDefinition(); /** Parse reqular expression delimited fields */ - Status parseRegexp( QStringList &fields ); + Status parseRegexp( QString &buffer, QStringList &fields ); /** Parse quote delimited fields, where quote and escape are different */ - Status parseQuoted( QStringList &fields ); + Status parseQuoted( QString &buffer, QStringList &fields ); /** Return the next line from the data file. If skipBlank is true then * blank lines will be skipped - this is for compatibility with previous @@ -300,19 +333,24 @@ class QgsDelimitedTextFile */ Status nextLine( QString &buffer, bool skipBlank = false ); + /** Set the next line to read from the file. + */ + bool setNextLineNumber( long nextLineNumber ); + /** Utility routine to add a field to a record, accounting for trimming * and discarding, and maximum field count */ - void appendField( QStringList &record, QString field, bool quoted = false ); // Pointer to the currently selected parser - Status( QgsDelimitedTextFile::*mParser )( QStringList &fields ); + Status( QgsDelimitedTextFile::*mParser )( QString &buffer, QStringList &fields ); QString mFileName; QString mEncoding; QFile *mFile; QTextStream *mStream; + bool mUseWatcher; + QFileSystemWatcher *mWatcher; // Parameters common to parsers bool mDefinitionValid; @@ -333,7 +371,14 @@ class QgsDelimitedTextFile // Information extracted from file QStringList mFieldNames; - int mLineNumber; - int mRecordLineNumber; + long mLineNumber; + long mRecordLineNumber; + long mRecordNumber; + QStringList mCurrentRecord; + bool mHoldCurrentRecord; + // Maximum number of record (ie maximum record number visited) + long mMaxRecordNumber; int mMaxFieldCount; }; + +#endif diff --git a/src/providers/delimitedtext/qgsdelimitedtextprovider.cpp b/src/providers/delimitedtext/qgsdelimitedtextprovider.cpp index 14861ba22641..ef29883e6c5c 100644 --- a/src/providers/delimitedtext/qgsdelimitedtextprovider.cpp +++ b/src/providers/delimitedtext/qgsdelimitedtextprovider.cpp @@ -37,6 +37,7 @@ #include "qgsmessagelog.h" #include "qgsmessageoutput.h" #include "qgsrectangle.h" +#include "qgsspatialindex.h" #include "qgis.h" #include "qgsdelimitedtextsourceselect.h" @@ -46,6 +47,11 @@ static const QString TEXT_PROVIDER_KEY = "delimitedtext"; static const QString TEXT_PROVIDER_DESCRIPTION = "Delimited text data provider"; +// If more than this fraction of records are not in a subset then use an index to +// iterate over records rather than simple iterator and filter. + +static const int SUBSET_ID_THRESHOLD_FACTOR = 10; + QRegExp QgsDelimitedTextProvider::WktPrefixRegexp( "^\\s*(?:\\d+\\s+|SRID\\=\\d+\\;)", Qt::CaseInsensitive ); QRegExp QgsDelimitedTextProvider::WktZMRegexp( "\\s*(?:z|m|zm)(?=\\s*\\()", Qt::CaseInsensitive ); QRegExp QgsDelimitedTextProvider::WktCrdRegexp( "(\\-?\\d+(?:\\.\\d*)?\\s+\\-?\\d+(?:\\.\\d*)?)\\s[\\s\\d\\.\\-]+" ); @@ -54,6 +60,7 @@ QRegExp QgsDelimitedTextProvider::CrdDmsRegexp( "^\\s*(?:([-+nsew])\\s*)?(\\d{1, QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) : QgsVectorDataProvider( uri ) , mFile( 0 ) + , mGeomRep( GeomNone ) , mFieldCount( 0 ) , mXFieldIndex( -1 ) , mYFieldIndex( -1 ) @@ -63,11 +70,15 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) , mXyDms( false ) , mSubsetString( "" ) , mSubsetExpression( 0 ) + , mBuildSubsetIndex( true ) + , mUseSubsetIndex( false ) , mMaxInvalidLines( 50 ) , mShowInvalidLines( true ) , mCrs() , mWkbType( QGis::WKBNoGeometry ) , mGeometryType( QGis::UnknownGeometry ) + , mBuildSpatialIndex( false ) + , mSpatialIndex( 0 ) , mActiveIterator( 0 ) { @@ -77,9 +88,6 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) mFile = new QgsDelimitedTextFile(); mFile->setFromUrl( url ); - QString wktField; - QString xField; - QString yField; QString subset; if ( url.hasQueryItem( "geomType" ) ) @@ -95,13 +103,18 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) { if ( url.hasQueryItem( "wktField" ) ) { - wktField = url.queryItemValue( "wktField" ); + mWktFieldName = url.queryItemValue( "wktField" ); + mGeomRep = GeomAsWkt; + QgsDebugMsg( "wktField is: " + mWktFieldName ); } else if ( url.hasQueryItem( "xField" ) && url.hasQueryItem( "yField" ) ) { + mGeomRep = GeomAsXy; mGeometryType = QGis::Point; - xField = url.queryItemValue( "xField" ); - yField = url.queryItemValue( "yField" ); + mXFieldName = url.queryItemValue( "xField" ); + mYFieldName = url.queryItemValue( "yField" ); + QgsDebugMsg( "xField is: " + mXFieldName ); + QgsDebugMsg( "yField is: " + mYFieldName ); if ( url.hasQueryItem( "xyDms" ) ) { @@ -120,10 +133,15 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) if ( url.hasQueryItem( "crs" ) ) mCrs.createFromString( url.queryItemValue( "crs" ) ); + if ( url.hasQueryItem( "subsetIndex" ) ) + { + mBuildSubsetIndex = ! url.queryItemValue( "subsetIndex" ).toLower().startsWith( "n" ); + } - QgsDebugMsg( "wktField is: " + wktField ); - QgsDebugMsg( "xField is: " + xField ); - QgsDebugMsg( "yField is: " + yField ); + if ( url.hasQueryItem( "spatialIndex" ) ) + { + mBuildSpatialIndex = ! url.queryItemValue( "spatialIndex" ).toLower().startsWith( "n" ); + } if ( url.hasQueryItem( "subset" ) ) { @@ -133,14 +151,87 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) if ( url.hasQueryItem( "quiet" ) ) mShowInvalidLines = false; + // Do an initial scan of the file to determine field names, types, + // geometry type (for Wkt), extents, etc. Parameter value subset.isEmpty() + // avoid redundant building indexes if we will be building a subset string, + // in which case indexes will be rebuilt. + + scanFile( subset.isEmpty() ); + + if ( ! subset.isEmpty() ) + { + setSubsetString( subset ); + } +} + +void QgsDelimitedTextProvider::resetCachedSubset() +{ + mCachedSubsetString=QString(); + mCachedUseSubsetIndex = false; + mCachedUseSpatialIndex = false; +} + + +void QgsDelimitedTextProvider::resetIndexes() +{ + resetCachedSubset(); + mUseSubsetIndex = false; + mUseSpatialIndex = false; + + mSubsetIndex.clear(); + if ( mSpatialIndex ) delete mSpatialIndex; + mSpatialIndex = 0; + if ( mBuildSpatialIndex && mGeomRep != GeomNone ) mSpatialIndex = new QgsSpatialIndex(); +} + +bool QgsDelimitedTextProvider::createSpatialIndex() +{ + if ( mBuildSpatialIndex ) return true; // Already built + if ( mGeomRep == GeomNone ) return false; // Cannot build index - no geometries + + // Ok, set the spatial index option, set the Uri parameter so that the index is + // rebuilt when theproject is reloaded, and rescan the file to populate the index + + mBuildSpatialIndex = true; + setUriParameter( "spatialIndex", "yes" ); + rescanFile(); + return true; +} + +// buildIndexes parameter of scanFile is to allow for potential rescan - if using +// subset string then rescan follows this to determine subset extents etc. +// Done this way as subset requires fields to be defined, which they are not +// until initial file scan is complete. +// +// Although at this point the subset expression does not apply (if one is defined) +// we still consider a subset index, as this also applies for implicit subsets +// due to filtering on geometry validity. + +void QgsDelimitedTextProvider::scanFile( bool buildIndexes ) +{ + QStringList messages; + // assume the layer is invalid until proven otherwise - clearInvalidLines(); + mValid = false; + + clearInvalidLines(); + + // Initiallize indexes + + resetIndexes(); + bool buildSpatialIndex = buildIndexes && mSpatialIndex != 0; + + // No point building a subset index if there is no geometry, as all + // records will be included. + + bool buildSubsetIndex = buildIndexes && mBuildSubsetIndex && mGeomRep != GeomNone; + if ( ! mFile->isValid() ) { // uri is invalid so the layer must be too... - QStringList messages; - messages.append( "File cannot be opened or delimiter parameters are not valid" ); + + messages.append( tr( "File cannot be opened or delimiter parameters are not valid" ) ); reportErrors( messages ); QgsDebugMsg( "Delimited text source invalid - filename or delimiter parameters" ); return; @@ -150,21 +241,32 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) // file has a header row and process accordingly. Caller should make // sure that the delimited file is properly formed. - mWktFieldIndex = mFile->fieldIndex( wktField ); - mXFieldIndex = mFile->fieldIndex( xField ); - mYFieldIndex = mFile->fieldIndex( yField ); - - if ( mWktFieldIndex >= 0 ) + if ( mGeomRep == GeomAsWkt ) { - QgsDebugMsg( "Found wkt field: " + QString::number( mWktFieldIndex + 1 ) ); + mWktFieldIndex = mFile->fieldIndex( mWktFieldName ); + if ( mWktFieldIndex < 0 ) + { + messages.append( tr( "%0 field %1 is not defined in delimited text file" ).arg( "Wkt" ).arg( mWktFieldName ) ); + } } - if ( mXFieldIndex >= 0 ) + else if ( mGeomRep == GeomAsXy ) { - QgsDebugMsg( "Found X field: " + QString::number( mXFieldIndex + 1 ) ); + mXFieldIndex = mFile->fieldIndex( mXFieldName ); + mYFieldIndex = mFile->fieldIndex( mYFieldName ); + if ( mXFieldIndex < 0 ) + { + messages.append( tr( "%0 field %1 is not defined in delimited text file" ).arg( "X" ).arg( mWktFieldName ) ); + } + if ( mYFieldIndex < 0 ) + { + messages.append( tr( "%0 field %1 is not defined in delimited text file" ).arg( "Y" ).arg( mWktFieldName ) ); + } } - if ( mYFieldIndex >= 0 ) + if ( messages.size() > 0 ) { - QgsDebugMsg( "Found Y field: " + QString::number( mYFieldIndex + 1 ) ); + reportErrors( messages ); + QgsDebugMsg( "Delimited text source invalid - missing geometry fields" ); + return; } // Scan the entire file to determine @@ -173,13 +275,15 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) // should match the code in QgsDelimitedTextFeatureIterator // 3) the geometric extents of the layer // 4) the type of each field + // + // Also build subset and spatial indexes. QStringList parts; - int nEmptyRecords = 0; - int nBadFormatRecords = 0; - int nIncompatibleGeometry = 0; - int nInvalidGeometry = 0; - int nEmptyGeometry = 0; + long nEmptyRecords = 0; + long nBadFormatRecords = 0; + long nIncompatibleGeometry = 0; + long nInvalidGeometry = 0; + long nEmptyGeometry = 0; mNumberFeatures = 0; mExtent = QgsRectangle(); @@ -207,7 +311,7 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) // Check geometries are valid bool geomValid = true; - if ( mWktFieldIndex >= 0 ) + if ( mGeomRep == GeomAsWkt ) { if ( mWktFieldIndex >= parts.size() || parts[mWktFieldIndex].isEmpty() ) { @@ -248,6 +352,16 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) QgsRectangle bbox( geom->boundingBox() ); mExtent.combineExtentWith( &bbox ); } + if ( buildSpatialIndex ) + { + QgsFeature f; + f.setFeatureId( mFile->recordId() ); + f.setGeometry( geom ); + mSpatialIndex->insertFeature( f ); + // Feature now has ownership of geometry, so set to null + // here to avoid deleting twice. + geom = 0; + } } else { @@ -255,7 +369,7 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) geomValid = false; } } - delete geom; + if ( geom ) delete geom; } else { @@ -265,7 +379,7 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) } } } - else if ( mXFieldIndex >= 0 && mYFieldIndex >= 0 ) + else if ( mGeomRep == GeomAsXy ) { // Get the x and y values, first checking to make sure they // aren't null. @@ -296,6 +410,13 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) mGeometryType = QGis::Point; } mNumberFeatures++; + if ( buildSpatialIndex ) + { + QgsFeature f; + f.setFeatureId( mFile->recordId() ); + f.setGeometry( QgsGeometry::fromPoint( pt ) ); + mSpatialIndex->insertFeature( f ); + } } else { @@ -313,6 +434,9 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) if ( ! geomValid ) continue; + if ( buildSubsetIndex ) mSubsetIndex.append( mFile->recordId() ); + + // If we are going to use this record, then assess the potential types of each colum for ( int i = 0; i < parts.size(); i++ ) @@ -399,14 +523,51 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( QString uri ) warnings.append( tr( "%1 records discarded due to incompatible geometry types" ).arg( nIncompatibleGeometry ) ); reportErrors( warnings ); - mValid = mGeometryType != QGis::UnknownGeometry; - if ( ! subset.isEmpty() ) + // Decide whether to use subset ids to index records rather than simple iteration through all + // If more than 10% of records are being skipped, then use index. (Not based on any experimentation, + // could do with some analysis?) + + if ( buildSubsetIndex ) { - setSubsetString( subset ); + long recordCount = mFile->recordCount(); + recordCount -= recordCount / SUBSET_ID_THRESHOLD_FACTOR; + mUseSubsetIndex = mSubsetIndex.size() < recordCount; + if ( ! mUseSubsetIndex ) mSubsetIndex = QList(); } + + mUseSpatialIndex = buildSpatialIndex; + + mValid = mGeometryType != QGis::UnknownGeometry; + + // If it is valid, then watch for changes to the file + connect( mFile, SIGNAL( fileUpdated() ), this, SLOT( onFileUpdated() ) ); + + } +QgsDelimitedTextProvider::~QgsDelimitedTextProvider() +{ + if ( mActiveIterator ) + mActiveIterator->close(); + + if ( mFile ) + { + delete mFile; + mFile = 0; + } + + if ( mSubsetExpression ) + { + delete mSubsetExpression; + mSubsetExpression = 0; + } + if ( mSpatialIndex ) + { + delete mSpatialIndex; + mSpatialIndex = 0; + } +} QgsGeometry *QgsDelimitedTextProvider::geomFromWkt( QString &sWkt ) { @@ -500,24 +661,6 @@ bool QgsDelimitedTextProvider::pointFromXY( QString &sX, QString &sY, QgsPoint & } -QgsDelimitedTextProvider::~QgsDelimitedTextProvider() -{ - if ( mActiveIterator ) - mActiveIterator->close(); - - if ( mFile ) - { - delete mFile; - mFile = 0; - } - - if ( mSubsetExpression ) - { - delete mSubsetExpression; - mSubsetExpression = 0; - } -} - QString QgsDelimitedTextProvider::storageType() const { @@ -553,7 +696,7 @@ void QgsDelimitedTextProvider::recordInvalidLine( QString message ) { if ( mInvalidLines.size() < mMaxInvalidLines ) { - mInvalidLines.append( message.arg( mFile->recordLineNumber() ) ); + mInvalidLines.append( message.arg( mFile->recordId() ) ); } else { @@ -561,7 +704,7 @@ void QgsDelimitedTextProvider::recordInvalidLine( QString message ) } } -void QgsDelimitedTextProvider::reportErrors( QStringList messages ) +void QgsDelimitedTextProvider::reportErrors( QStringList messages , bool showDialog ) { if ( !mInvalidLines.isEmpty() || ! messages.isEmpty() ) { @@ -581,7 +724,7 @@ void QgsDelimitedTextProvider::reportErrors( QStringList messages ) } // Display errors in a dialog... - if ( mShowInvalidLines ) + if ( mShowInvalidLines && showDialog ) { QgsMessageOutput* output = QgsMessageOutput::createMessageOutput(); output->setTitle( tr( "Delimited text file errors" ) ); @@ -610,6 +753,10 @@ void QgsDelimitedTextProvider::reportErrors( QStringList messages ) bool QgsDelimitedTextProvider::setSubsetString( QString subset, bool updateFeatureCount ) { + // If not changing string, then oll ok, nothing to do + + if( subset.isNull() ) subset=""; + if( subset == mSubsetString ) return true; bool valid = true; @@ -643,35 +790,134 @@ bool QgsDelimitedTextProvider::setSubsetString( QString subset, bool updateFeatu } } - // if the expression is valid, then reset the subset string and data source Uri if ( valid ) { if ( mSubsetExpression ) delete mSubsetExpression; + QString previousSubset = mSubsetString; mSubsetString = subset; mSubsetExpression = expression; - // Encode the subset string into the data source URI. + // Update the feature count and extents if requested - QUrl url = QUrl::fromEncoded( dataSourceUri().toAscii() ); - if ( url.hasQueryItem( "subset" ) ) url.removeAllQueryItems( "subset" ); - if ( ! subset.isEmpty() ) url.addQueryItem( "subset", subset ); - setDataSourceUri( QString::fromAscii( url.toEncoded() ) ); + // Usage of updateFeatureCount is a bit painful, basically expect that it + // will only be false for a temporary subset, and the original subset + // will be replaced before an update is requeired. + // + // It appears to be false for a temporary subset string, which is used to + // get some data, and then immediately reset. No point scanning file and + // resetting subset index for this. On the other hand, we don't want to + // lose indexes in this instance, or have to rescan file. So we cache + // the settings until a real subset is required. - // Update the feature count and extents if requested if ( updateFeatureCount ) { - resetDataSummary(); + if( ! mCachedSubsetString.isNull() && mSubsetString == mCachedSubsetString ) + { + QgsDebugMsg(QString("DelimitedText: Resetting cached subset string %1").arg(mSubsetString)); + mUseSpatialIndex = mCachedUseSpatialIndex; + mUseSubsetIndex = mCachedUseSubsetIndex; + resetCachedSubset(); + } + else + { + QgsDebugMsg(QString("DelimitedText: Setting new subset string %1").arg(mSubsetString)); + // Reset the subset index + rescanFile(); + // Encode the subset string into the data source URI. + setUriParameter( "subset", subset ); + } + } + else + { + // If not already using temporary subset, then cache the current subset + QgsDebugMsg(QString("DelimitedText: Setting temporary subset string %1").arg(mSubsetString)); + if( mCachedSubsetString.isNull() ) + { + QgsDebugMsg(QString("DelimitedText: Caching previous subset %1").arg(previousSubset)); + mCachedSubsetString=previousSubset; + mCachedUseSpatialIndex = mUseSpatialIndex; + mCachedUseSubsetIndex = mUseSubsetIndex; + } + mUseSubsetIndex = false; + mUseSpatialIndex = false; } } return valid; } -void QgsDelimitedTextProvider::resetDataSummary() +void QgsDelimitedTextProvider::setUriParameter( QString parameter, QString value ) +{ + QUrl url = QUrl::fromEncoded( dataSourceUri().toAscii() ); + if ( url.hasQueryItem( parameter ) ) url.removeAllQueryItems( parameter ); + if ( ! value.isEmpty() ) url.addQueryItem( parameter, value ); + setDataSourceUri( QString::fromAscii( url.toEncoded() ) ); +} + +// rescanFile. Called if something has changed file definition, such as +// selecting a subset, the file has been changed by another program, etc + +void QgsDelimitedTextProvider::rescanFile() { + resetIndexes(); + + bool buildSpatialIndex = mSpatialIndex != 0; + bool buildSubsetIndex = mBuildSubsetIndex && ( mSubsetExpression || mGeomRep != GeomNone ); + + // In case file has been rewritten, check that required fields are still + // valid + + mValid = mFile->isValid(); + if ( ! mValid ) return; + + // Open the file and get number of rows, etc. We assume that the + // file has a header row and process accordingly. Caller should make + // sure that the delimited file is properly formed. + + QStringList messages; + + if ( mGeomRep == GeomAsWkt ) + { + mWktFieldIndex = mFile->fieldIndex( mWktFieldName ); + if ( mWktFieldIndex < 0 ) + { + messages.append( tr( "%0 field %1 is not defined in delimited text file" ).arg( "Wkt" ).arg( mWktFieldName ) ); + } + } + else if ( mGeomRep == GeomAsXy ) + { + mXFieldIndex = mFile->fieldIndex( mXFieldName ); + mYFieldIndex = mFile->fieldIndex( mYFieldName ); + if ( mXFieldIndex < 0 ) + { + messages.append( tr( "%0 field %1 is not defined in delimited text file" ).arg( "X" ).arg( mWktFieldName ) ); + } + if ( mYFieldIndex < 0 ) + { + messages.append( tr( "%0 field %1 is not defined in delimited text file" ).arg( "Y" ).arg( mWktFieldName ) ); + } + } + if ( messages.size() > 0 ) + { + reportErrors( messages, false ); + QgsDebugMsg( "Delimited text source invalid on rescan - missing geometry fields" ); + mValid = false; + } + + // Reset the field columns + + for ( int i = 0; i < attributeFields.size(); i++ ) + { + attributeColumns[i] = mFile->fieldIndex( attributeFields[i].name() ); + } + + // Scan through the features in the file + + mSubsetIndex.clear(); + mUseSubsetIndex = false; QgsFeatureIterator fi = getFeatures( QgsFeatureRequest() ); mNumberFeatures = 0; mExtent = QgsRectangle(); @@ -689,53 +935,81 @@ void QgsDelimitedTextProvider::resetDataSummary() QgsRectangle bbox( f.geometry()->boundingBox() ); mExtent.combineExtentWith( &bbox ); } + if ( buildSpatialIndex ) mSpatialIndex->insertFeature( f ); } + if ( buildSubsetIndex ) mSubsetIndex.append(( quintptr ) f.id() ); mNumberFeatures++; } + if ( buildSubsetIndex ) + { + long recordCount = mFile->recordCount(); + recordCount -= recordCount / SUBSET_ID_THRESHOLD_FACTOR; + mUseSubsetIndex = recordCount < mSubsetIndex.size(); + if ( ! mUseSubsetIndex ) mSubsetIndex.clear(); + } + + mUseSpatialIndex = buildSpatialIndex; } +void QgsDelimitedTextProvider::onFileUpdated() +{ + QStringList messages; + messages.append( tr( "The file has been updated by another application - reloading" ) ); + reportErrors( messages, false ); + + if ( mActiveIterator ) mActiveIterator->close(); + rescanFile(); +} -bool QgsDelimitedTextProvider::nextFeature( QgsFeature& feature, QgsDelimitedTextFile *file, const QgsFeatureRequest& request ) +bool QgsDelimitedTextProvider::nextFeature( QgsFeature& feature, QgsDelimitedTextFile *file, QgsDelimitedTextFeatureIterator *iterator ) { QStringList tokens; - while ( true ) + + // If the iterator is not scanning the file, then it will have requested a specific + // record, so only need to load that one. + + bool first = true; + bool scanning = iterator->scanningFile(); + + while ( scanning || first ) { + first = false; + // before we do anything else, assume that there's something wrong with // the feature + feature.setValid( false ); QgsDelimitedTextFile::Status status = file->nextRecord( tokens ); if ( status == QgsDelimitedTextFile::RecordEOF ) break; if ( status != QgsDelimitedTextFile::RecordOk ) continue; + // We ignore empty records, such as added randomly by spreadsheets - int fid = file->recordLineNumber(); - if ( request.filterType() == QgsFeatureRequest::FilterFid && fid != request.filterFid() ) continue; if ( recordIsEmpty( tokens ) ) continue; + QgsFeatureId fid = file->recordId(); + while ( tokens.size() < mFieldCount ) tokens.append( QString::null ); QgsGeometry *geom = 0; - // Note: Always need to load geometry even if request has NoGeometry set - // and subset string doesn't need geometry, as only by loading geometry - // do we know that this is a valid record in the data set. - if ( mWktFieldIndex >= 0 ) - { - geom = loadGeometryWkt( tokens, request ); - } - else if ( mXFieldIndex >= 0 && mYFieldIndex >= 0 ) - { - geom = loadGeometryXY( tokens, request ); - } + // Load the geometry if required - if ( !geom && mWkbType != QGis::WKBNoGeometry ) + if ( iterator->loadGeometry() ) { - // Already dealt with invalid lines in provider - no need to repeat - // removed code (CC 2013-04-13) ... - // mInvalidLines << line; - // In any case it may be a valid line that is excluded because of - // bounds check... - continue; + if ( mGeomRep == GeomAsWkt ) + { + geom = loadGeometryWkt( tokens, iterator ); + } + else if ( mGeomRep == GeomAsXy ) + { + geom = loadGeometryXY( tokens, iterator ); + } + + if ( ! geom ) + { + continue; + } } // At this point the current feature values are valid @@ -748,10 +1022,12 @@ bool QgsDelimitedTextProvider::nextFeature( QgsFeature& feature, QgsDelimitedTex if ( geom ) feature.setGeometry( geom ); - // If we have subset expression, then ened attributes - if ( ! mSubsetExpression && request.flags() & QgsFeatureRequest::SubsetOfAttributes ) + // If we are testing subset expression, then need all attributes just in case. + // Could be more sophisticated, but probably not worth it! + + if ( iterator->loadSubsetOfAttributes() ) { - const QgsAttributeList& attrs = request.subsetOfAttributes(); + const QgsAttributeList& attrs = iterator->subsetOfAttributes(); for ( QgsAttributeList::const_iterator i = attrs.begin(); i != attrs.end(); ++i ) { int fieldIdx = *i; @@ -764,26 +1040,25 @@ bool QgsDelimitedTextProvider::nextFeature( QgsFeature& feature, QgsDelimitedTex fetchAttribute( feature, idx, tokens ); } - // Are we using a subset expression, if so try and evaluate - // and accept result if passes. + // If the iterator hasn't already filtered out the subset, then do it now - if ( mSubsetExpression ) + if ( iterator->testSubset() ) { QVariant isOk = mSubsetExpression->evaluate( &feature ); if ( mSubsetExpression->hasEvalError() ) continue; if ( ! isOk.toBool() ) continue; } - // We have a good line, so return + // We have a good record, so return return true; - } // !mStream->atEnd() + } return false; } -QgsGeometry* QgsDelimitedTextProvider::loadGeometryWkt( const QStringList& tokens, const QgsFeatureRequest& request ) +QgsGeometry* QgsDelimitedTextProvider::loadGeometryWkt( const QStringList& tokens, QgsDelimitedTextFeatureIterator *iterator ) { QgsGeometry* geom = 0; QString sWkt = tokens[mWktFieldIndex]; @@ -795,7 +1070,7 @@ QgsGeometry* QgsDelimitedTextProvider::loadGeometryWkt( const QStringList& token delete geom; geom = 0; } - if ( geom && !boundsCheck( geom, request ) ) + if ( geom && ! iterator->wantGeometry( geom ) ) { delete geom; geom = 0; @@ -804,47 +1079,20 @@ QgsGeometry* QgsDelimitedTextProvider::loadGeometryWkt( const QStringList& token } -QgsGeometry* QgsDelimitedTextProvider::loadGeometryXY( const QStringList& tokens, const QgsFeatureRequest& request ) +QgsGeometry* QgsDelimitedTextProvider::loadGeometryXY( const QStringList& tokens, QgsDelimitedTextFeatureIterator *iterator ) { QString sX = tokens[mXFieldIndex]; QString sY = tokens[mYFieldIndex]; QgsPoint pt; bool ok = pointFromXY( sX, sY, pt ); - if ( ok && boundsCheck( pt, request ) ) + if ( ok && iterator->wantGeometry( pt ) ) { return QgsGeometry::fromPoint( pt ); } return 0; } -/** - * Check to see if the point is within the selection rectangle - */ -bool QgsDelimitedTextProvider::boundsCheck( const QgsPoint &pt, const QgsFeatureRequest& request ) -{ - // no selection rectangle or geometry => always in the bounds - if ( request.filterType() != QgsFeatureRequest::FilterRect || ( request.flags() & QgsFeatureRequest::NoGeometry ) ) - return true; - - return request.filterRect().contains( pt ); -} - -/** - * Check to see if the geometry is within the selection rectangle - */ -bool QgsDelimitedTextProvider::boundsCheck( QgsGeometry *geom, const QgsFeatureRequest& request ) -{ - // no selection rectangle or geometry => always in the bounds - if ( request.filterType() != QgsFeatureRequest::FilterRect || ( request.flags() & QgsFeatureRequest::NoGeometry ) ) - return true; - - if ( request.flags() & QgsFeatureRequest::ExactIntersect ) - return geom->intersects( request.filterRect() ); - else - return geom->boundingBox().intersects( request.filterRect() ); -} - void QgsDelimitedTextProvider::fetchAttribute( QgsFeature& feature, int fieldIdx, const QStringList& tokens ) { @@ -918,7 +1166,7 @@ bool QgsDelimitedTextProvider::isValid() int QgsDelimitedTextProvider::capabilities() const { - return NoCapabilities; + return SelectAtId | CreateSpatialIndex; } diff --git a/src/providers/delimitedtext/qgsdelimitedtextprovider.h b/src/providers/delimitedtext/qgsdelimitedtextprovider.h index ecf5a500a69b..215bf497ac47 100644 --- a/src/providers/delimitedtext/qgsdelimitedtextprovider.h +++ b/src/providers/delimitedtext/qgsdelimitedtextprovider.h @@ -15,9 +15,12 @@ * * ***************************************************************************/ +#ifndef QGSDELIMITEDTEXTPROVIDER_H +#define QGSDELIMITEDTEXTPROVIDER_H #include "qgsvectordataprovider.h" #include "qgscoordinatereferencesystem.h" +#include "qgsdelimitedtextfile.h" #include @@ -29,8 +32,8 @@ class QFile; class QTextStream; class QgsDelimitedTextFeatureIterator; -class QgsDelimitedTextFile; class QgsExpression; +class QgsSpatialIndex; /** \class QgsDelimitedTextProvider @@ -47,7 +50,8 @@ class QgsExpression; * Example uri = "/home/foo/delim.txt?delimiter=|"* * * For detailed information on the uri format see the QGSVectorLayer -* documentation. +* documentation. Note that the interpretation of the URI is split +* between QgsDelimitedTextFile and QgsDelimitedTextProvider. * */ @@ -64,6 +68,13 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider static QRegExp WktPrefixRegexp; static QRegExp CrdDmsRegexp; + enum GeomRepresentationType + { + GeomNone, + GeomAsXy, + GeomAsWkt + }; + QgsDelimitedTextProvider( QString uri = QString() ); virtual ~QgsDelimitedTextProvider(); @@ -102,6 +113,10 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider */ virtual int capabilities() const; + /** Creates a spatial index on the data + * @return indexCreated Returns true if a spatial index is created + */ + virtual bool createSpatialIndex(); /* Implementation of functions from QgsDataProvider */ @@ -186,23 +201,30 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider */ bool boundsCheck( QgsGeometry *geom ); + private slots: + + void onFileUpdated(); + private: static QRegExp WktZMRegexp; static QRegExp WktCrdRegexp; + void scanFile( bool buildIndexes ); + void rescanFile(); + void resetCachedSubset(); + void resetIndexes(); void clearInvalidLines(); void recordInvalidLine( QString message ); - void reportErrors( QStringList messages = QStringList() ); + void reportErrors( QStringList messages = QStringList(), bool showDialog = true ); void resetStream(); bool recordIsEmpty( QStringList &record ); - bool nextFeature( QgsFeature& feature, QgsDelimitedTextFile *file, const QgsFeatureRequest& request ); - QgsGeometry* loadGeometryWkt( const QStringList& tokens, const QgsFeatureRequest& request ); - QgsGeometry* loadGeometryXY( const QStringList& tokens, const QgsFeatureRequest& request ); - bool boundsCheck( const QgsPoint &pt, const QgsFeatureRequest& request ); - bool boundsCheck( QgsGeometry *geom, const QgsFeatureRequest& request ); + bool nextFeature( QgsFeature& feature, QgsDelimitedTextFile *file, QgsDelimitedTextFeatureIterator *iterator ); + QgsGeometry* loadGeometryWkt( const QStringList& tokens, QgsDelimitedTextFeatureIterator *iterator ); + QgsGeometry* loadGeometryXY( const QStringList& tokens, QgsDelimitedTextFeatureIterator *iterator ); void fetchAttribute( QgsFeature& feature, int fieldIdx, const QStringList& tokens ); - void resetDataSummary(); + void setUriParameter( QString parameter, QString value ); + bool setNextFeatureId( qint64 fid ) { return mFile->setNextRecordId( (long) fid ); } QgsGeometry *geomFromWkt( QString &sWkt ); @@ -216,10 +238,15 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider QgsDelimitedTextFile *mFile; // Fields + GeomRepresentationType mGeomRep; QList attributeColumns; QgsFields attributeFields; int mFieldCount; // Note: this includes field count for wkt field + QString mWktFieldName; + QString mXFieldName; + QString mYFieldName; + int mXFieldIndex; int mYFieldIndex; int mWktFieldIndex; @@ -246,7 +273,12 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider bool mXyDms; QString mSubsetString; + QString mCachedSubsetString; QgsExpression *mSubsetExpression; + bool mBuildSubsetIndex; + QList mSubsetIndex; + bool mUseSubsetIndex; + bool mCachedUseSubsetIndex; //! Storage for any lines in the file that couldn't be loaded int mMaxInvalidLines; @@ -270,6 +302,14 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider QGis::WkbType mWkbType; QGis::GeometryType mGeometryType; + // Spatial index + bool mBuildSpatialIndex; + bool mUseSpatialIndex; + bool mCachedUseSpatialIndex; + QgsSpatialIndex *mSpatialIndex; + friend class QgsDelimitedTextFeatureIterator; QgsDelimitedTextFeatureIterator* mActiveIterator; }; + +#endif diff --git a/tests/src/python/test_qgsdelimitedtextprovider.py b/tests/src/python/test_qgsdelimitedtextprovider.py index ea831fce95d1..8a737f32e79d 100644 --- a/tests/src/python/test_qgsdelimitedtextprovider.py +++ b/tests/src/python/test_qgsdelimitedtextprovider.py @@ -12,9 +12,6 @@ # This will get replaced with a git SHA1 when you do a git archive __revision__ = '$Format:%H$' -rebuildTests = False -#rebuildTests = True - # This module provides unit test for the delimtied text provider. It uses data files in # the testdata/delimitedtext directory. # @@ -26,10 +23,17 @@ # # To recreate all tests, set rebuildTests to true +import os; import os.path; import re +import tempfile +import inspect +import test_qgsdelimitedtextprovider_wanted as want + +rebuildTests = 'REBUILD_DELIMITED_TEXT_TESTS' in os.environ; from PyQt4.QtCore import (QVariant, + QCoreApplication, QUrl, QObject, QString, @@ -94,6 +98,8 @@ def layerData( layer, request={}, offset=0 ): if request: if 'exact' in request and request['exact']: fr.setFlags(QgsFeatureRequest.ExactIntersect) + if 'nogeom' in request and request['nogeom']: + fr.setFlags(QgsFeatureRequest.NoGeometry) if 'fid' in request: fr.setFilterFid( request['fid'] ) elif 'extents' in request: @@ -118,16 +124,16 @@ def layerData( layer, request={}, offset=0 ): description = fielddata[fields[1]] fielddata['id']=id fielddata['description']=description - if 'id' not in fields: fields.insert(0,'id') - if 'description' not in fields: fields.insert(1,'description') data[f.id()+offset]=fielddata + if 'id' not in fields: fields.insert(0,'id') + if 'description' not in fields: fields.insert(1,'description') fields.append(fidkey) fields.append(geomkey) return fields, data # Retrieve the data for a delimited text url -def delimitedTextData( filename, requests, **params ): +def delimitedTextData( testname, filename, requests, verbose, **params ): # Create a layer for the specified file and query parameters # and return the data for the layer (fields, data) @@ -138,42 +144,44 @@ def delimitedTextData( filename, requests, **params ): for k in params.keys(): url.addQueryItem(k,params[k]) urlstr = url.toString() + log=[] with MessageLogger('DelimitedText') as logger: + if verbose: + print testname layer = QgsVectorLayer(urlstr,'test','delimitedtext') + uri = unicode(layer.dataProvider().dataSourceUri()) + if verbose: + print uri + basename = os.path.basename(filepath) + if not basename.startswith('test'): + basename='file' + uri = uri.replace(filepath,basename) fields = [] data = {} if layer.isValid(): for nr,r in enumerate(requests): + if verbose: + print "Processing request",nr+1,repr(r) + if callable(r): + r( layer ) + continue rfields,rdata = layerData(layer,r,nr*1000) if len(rfields) > len(fields): fields = rfields data.update(rdata) - log=[] + if not rdata: + log.append("Request "+str(nr)+" did not return any data") for msg in logger.messages(): log.append(msg.replace(filepath,'file')) - uri = unicode(layer.dataProvider().dataSourceUri()) - uri = uri.replace(filepath,'file') return dict( fields=fields, data=data, log=log, uri=uri) -def createTest( description, filename, requests, **params ): - # Routine to write a new test for a file. Need to check the output is right - # first of course! - import inspect - test=inspect.stack()[1][3]; - result = delimitedTextData( filename, requests, **params ) - print - print " def {0}(self):".format(test) - print " description={0}".format(repr(description)) - print " filename={0}".format(repr(filename)) - print " params={0}".format(repr(params)) - print " requests={0}".format(repr(requests)).replace("{","\n {") - print " if rebuildTests:" - print " createTest(description,filename,requests,**params)" - print " assert False,\"Set rebuildTests to False to run delimited text tests\"" - +def printWanted( testname, result ): + # Routine to export the result as a function definition + print + print "def {0}():".format(testname) data=result['data'] log=result['log'] fields=result['fields'] - prefix=' ' + prefix=' ' # Dump the data for a layer - used to construct unit tests print prefix+"wanted={}" @@ -191,7 +199,7 @@ def createTest( description, filename, requests, **params ): for msg in log: print prefix+' '+repr(msg)+',' print prefix+' ]' - print ' runTest(description,wanted,filename,requests,**params)' + print ' return wanted' print @@ -200,7 +208,7 @@ def checkWktEqual( wkt1, wkt2 ): # Slightly complex to allow for small numeric difference in # coordinates... if wkt1 == wkt2: return True - # Use regex split with capture gropu to split into text and numbers + # Use regex split with capture group to split into text and numbers numberre=re.compile(r'(\-?\d+(?:\.\d+)?)') p1=numberre.split(wkt1) p2=numberre.split(wkt2) @@ -234,10 +242,21 @@ def recordDifference( record1, record2 ): return "Output contains extra field {0} is missing".format(k) return '' -def runTest( name, wanted, file, requests, **params ): - print "Running test:",name +def runTest( file, requests, **params ): + testname=inspect.stack()[1][3]; + verbose = not rebuildTests + if verbose: + print "Running test:",testname + result = delimitedTextData( testname, file, requests, verbose, **params ) + if rebuildTests: + printWanted( testname, result ) + assert False,"Test not run - being rebuilt" + try: + wanted = eval('want.{0}()'.format(testname)) + except: + printWanted( testname, result ) + assert False,"Test results not available for {0}".format(testname) - result = delimitedTextData( file, requests, **params ) data = result['data'] log = result['log'] failures = [] @@ -245,7 +264,7 @@ def runTest( name, wanted, file, requests, **params ): msg = "Layer Uri ({0}) doesn't match expected ({1})".format( result['uri'],wanted['uri']) print ' '+msg - falures.append(msg) + failures.append(msg) wanted_data = wanted['data'] for id in sorted(wanted_data.keys()): wrec = wanted_data[id] @@ -267,20 +286,22 @@ def runTest( name, wanted, file, requests, **params ): for l in log: if l in log_wanted: common.append(l) - for l in common: - log_wanted.remove(l) - log.remove(l) for l in log_wanted: - msg='Missing log message: '+l - print ' '+msg - failures.append(msg) - + if l not in common: + msg='Missing log message: '+l + print ' '+msg + failures.append(msg) for l in log: - msg='Extra log message: '+l - print ' '+msg - failures.append(msg) - if len(log)==0 and len(log_wanted)==0: + if l not in common: + msg='Extra log message: '+l + print ' '+msg + failures.append(msg) + if len(log)==len(common) and len(log_wanted)==len(common): print ' Message log correct: Passed' + + if failures: + printWanted( testname, result ) + assert len(failures) == 0,"\n".join(failures) class TestQgsDelimitedTextProvider(TestCase): @@ -290,1452 +311,309 @@ def test_001_provider_defined( self ): metadata = registry.providerMetadata('delimitedtext') assert metadata != None, "Delimited text provider is not installed" -#START - def test_002_load_csv_file(self): - description='CSV file parsing' + # CSV file parsing filename='test.csv' params={'geomType': 'none', 'type': 'csv'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&type=csv' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Basic unquoted record', - 'data': u'Some data', - 'info': u'Some info', - 'field_5': u'', - '#fid': 2L, - '#geometry': 'None', - }, - 3L: { - 'id': u'2', - 'description': u'Quoted field', - 'data': u'Quoted data', - 'info': u'Unquoted', - 'field_5': u'', - '#fid': 3L, - '#geometry': 'None', - }, - 4L: { - 'id': u'3', - 'description': u'Escaped quotes', - 'data': u'Quoted "citation" data', - 'info': u'Unquoted', - 'field_5': u'', - '#fid': 4L, - '#geometry': 'None', - }, - 5L: { - 'id': u'4', - 'description': u'Quoted newlines', - 'data': u'Line 1\nLine 2\n\nLine 4', - 'info': u'No data', - 'field_5': u'', - '#fid': 5L, - '#geometry': 'None', - }, - 9L: { - 'id': u'5', - 'description': u'Extra fields', - 'data': u'data', - 'info': u'info', - 'field_5': u'message', - '#fid': 9L, - '#geometry': 'None', - }, - 10L: { - 'id': u'6', - 'description': u'Missing fields', - 'data': u'', - 'info': u'', - 'field_5': u'', - '#fid': 10L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_002a_field_naming(self): - description='Management of missing/duplicate/invalid field names' + def test_003_field_naming(self): + # Management of missing/duplicate/invalid field names filename='testfields.csv' params={'geomType': 'none', 'type': 'csv'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&type=csv' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Generation of field names', - 'data': u'Some data', - 'field_4': u'Some info', - 'data_2': u'', - 'field_6': u'', - 'field_7': u'', - 'field_3_1': u'', - 'data_1': u'', - 'field_10': u'', - 'field_11': u'', - 'field_12': u'last data', - '#fid': 2L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_002b_max_fields(self): - description='Limiting maximum number of fields' + def test_004_max_fields(self): + # Limiting maximum number of fields filename='testfields.csv' params={'geomType': 'none', 'maxFields': '7', 'type': 'csv'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&maxFields=7&type=csv' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Generation of field names', - 'data': u'Some data', - 'field_4': u'Some info', - 'data_1': u'', - 'field_6': u'', - 'field_7': u'', - '#fid': 2L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) + runTest(filename,requests,**params) - - def test_003_load_whitespace(self): - description='Whitespace file parsing' + def test_005_load_whitespace(self): + # Whitespace file parsing filename='test.space' params={'geomType': 'none', 'type': 'whitespace'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&type=whitespace' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Simple_whitespace_file', - 'data': u'data1', - 'info': u'info1', - 'field_5': u'', - 'field_6': u'', - '#fid': 2L, - '#geometry': 'None', - }, - 3L: { - 'id': u'2', - 'description': u'Whitespace_at_start_of_line', - 'data': u'data2', - 'info': u'info2', - 'field_5': u'', - 'field_6': u'', - '#fid': 3L, - '#geometry': 'None', - }, - 4L: { - 'id': u'3', - 'description': u'Tab_whitespace', - 'data': u'data3', - 'info': u'info3', - 'field_5': u'', - 'field_6': u'', - '#fid': 4L, - '#geometry': 'None', - }, - 5L: { - 'id': u'4', - 'description': u'Multiple_whitespace_characters', - 'data': u'data4', - 'info': u'info4', - 'field_5': u'', - 'field_6': u'', - '#fid': 5L, - '#geometry': 'None', - }, - 6L: { - 'id': u'5', - 'description': u'Extra_fields', - 'data': u'data5', - 'info': u'info5', - 'field_5': u'message5', - 'field_6': u'rubbish5', - '#fid': 6L, - '#geometry': 'None', - }, - 7L: { - 'id': u'6', - 'description': u'Missing_fields', - 'data': u'', - 'info': u'', - 'field_5': u'', - 'field_6': u'', - '#fid': 7L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_004_quote_escape(self): - description='Quote and escape file parsing' + def test_006_quote_escape(self): + # Quote and escape file parsing filename='test.pipe' params={'geomType': 'none', 'quote': '"', 'delimiter': '|', 'escape': '\\'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none"e="&delimiter=|&escape=\\' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Using pipe delimiter', - 'data': u'data 1', - 'info': u'info 1', - 'field_5': u'', - 'field_6': u'', - '#fid': 2L, - '#geometry': 'None', - }, - 3L: { - 'id': u'2', - 'description': u'Using backslash escape on pipe', - 'data': u'data 2 | piped', - 'info': u'info2', - 'field_5': u'', - 'field_6': u'', - '#fid': 3L, - '#geometry': 'None', - }, - 4L: { - 'id': u'3', - 'description': u'Backslash escaped newline', - 'data': u'data3 \nline2 \nline3', - 'info': u'info3', - 'field_5': u'', - 'field_6': u'', - '#fid': 4L, - '#geometry': 'None', - }, - 7L: { - 'id': u'4', - 'description': u'Empty field', - 'data': u'', - 'info': u'info4', - 'field_5': u'', - 'field_6': u'', - '#fid': 7L, - '#geometry': 'None', - }, - 8L: { - 'id': u'5', - 'description': u'Quoted field', - 'data': u'More | piped data', - 'info': u'info5', - 'field_5': u'', - 'field_6': u'', - '#fid': 8L, - '#geometry': 'None', - }, - 9L: { - 'id': u'6', - 'description': u'Escaped quote', - 'data': u'Field "citation" ', - 'info': u'info6', - 'field_5': u'', - 'field_6': u'', - '#fid': 9L, - '#geometry': 'None', - }, - 10L: { - 'id': u'7', - 'description': u'Missing fields', - 'data': u'', - 'info': u'', - 'field_5': u'', - 'field_6': u'', - '#fid': 10L, - '#geometry': 'None', - }, - 11L: { - 'id': u'8', - 'description': u'Extra fields', - 'data': u'data8', - 'info': u'info8', - 'field_5': u'message8', - 'field_6': u'more', - '#fid': 11L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_005_multiple_quote(self): - description='Multiple quote and escape characters' + def test_007_multiple_quote(self): + # Multiple quote and escape characters filename='test.quote' params={'geomType': 'none', 'quote': '\'"', 'type': 'csv', 'escape': '"\''} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none"e=\'"&type=csv&escape="\'' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Multiple quotes 1', - 'data': u'Quoted,data1', - 'info': u'info1', - '#fid': 2L, - '#geometry': 'None', - }, - 3L: { - 'id': u'2', - 'description': u'Multiple quotes 2', - 'data': u'Quoted,data2', - 'info': u'info2', - '#fid': 3L, - '#geometry': 'None', - }, - 4L: { - 'id': u'3', - 'description': u'Leading and following whitespace', - 'data': u'Quoted, data3', - 'info': u'info3', - '#fid': 4L, - '#geometry': 'None', - }, - 5L: { - 'id': u'4', - 'description': u'Embedded quotes 1', - 'data': u'Quoted \'\'"\'\' data4', - 'info': u'info4', - '#fid': 5L, - '#geometry': 'None', - }, - 6L: { - 'id': u'5', - 'description': u'Embedded quotes 2', - 'data': u'Quoted \'""\' data5', - 'info': u'info5', - '#fid': 6L, - '#geometry': 'None', - }, - 10L: { - 'id': u'9', - 'description': u'Final record', - 'data': u'date9', - 'info': u'info9', - '#fid': 10L, - '#geometry': 'None', - }, - } - wanted['log']=[ - u'Errors in file file', - u'3 records discarded due to invalid format', - u'The following lines were not loaded into QGIS due to errors:', - u'Invalid record format at line 7', - u'Invalid record format at line 8', - u'Invalid record format at line 9', - ] - runTest(description,wanted,filename,requests,**params) + runTest(filename,requests,**params) - - def test_005a_badly_formed_quotes(self): - description='Badly formed quoted fields' + def test_008_badly_formed_quotes(self): + # Badly formed quoted fields filename='test.badquote' params={'geomType': 'none', 'quote': '"', 'type': 'csv', 'escape': '"'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none"e="&type=csv&escape="' - wanted['data']={ - 4L: { - 'id': u'3', - 'description': u'Recovered after unclosed quore', - 'data': u'Data ok', - 'info': u'inf3', - '#fid': 4L, - '#geometry': 'None', - }, - } - wanted['log']=[ - u'Errors in file file', - u'2 records discarded due to invalid format', - u'The following lines were not loaded into QGIS due to errors:', - u'Invalid record format at line 2', - u'Invalid record format at line 5', - ] - runTest(description,wanted,filename,requests,**params) + runTest(filename,requests,**params) - - def test_007_skip_lines(self): - description='Skip lines' + def test_009_skip_lines(self): + # Skip lines filename='test2.csv' params={'geomType': 'none', 'useHeader': 'no', 'type': 'csv', 'skipLines': '2'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&skipLines=2&type=csv&useHeader=no' - wanted['data']={ - 3L: { - 'id': u'3', - 'description': u'Less data', - 'field_1': u'3', - 'field_2': u'Less data', - 'field_3': u'data3', - '#fid': 3L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_008_read_coordinates(self): - description='Skip lines' + def test_010_read_coordinates(self): + # Skip lines filename='testpt.csv' params={'yField': 'geom_y', 'xField': 'geom_x', 'type': 'csv'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?yField=geom_y&xField=geom_x&type=csv' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Basic point', - 'geom_x': u'10', - 'geom_y': u'20', - '#fid': 2L, - '#geometry': 'POINT(10.0 20.0)', - }, - 3L: { - 'id': u'2', - 'description': u'Integer point', - 'geom_x': u'11', - 'geom_y': u'22', - '#fid': 3L, - '#geometry': 'POINT(11.0 22.0)', - }, - 5L: { - 'id': u'4', - 'description': u'Final point', - 'geom_x': u'13', - 'geom_y': u'23', - '#fid': 5L, - '#geometry': 'POINT(13.0 23.0)', - }, - } - wanted['log']=[ - u'Errors in file file', - u'1 records discarded due to invalid geometry definitions', - u'The following lines were not loaded into QGIS due to errors:', - u'Invalid X or Y fields at line 4', - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_009_read_wkt(self): - description='Reading WKT geometry field' + def test_011_read_wkt(self): + # Reading WKT geometry field filename='testwkt.csv' params={'delimiter': '|', 'type': 'csv', 'wktField': 'geom_wkt'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?delimiter=|&type=csv&wktField=geom_wkt' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Point wkt', - '#fid': 2L, - '#geometry': 'POINT(10.0 20.0)', - }, - 3L: { - 'id': u'2', - 'description': u'Multipoint wkt', - '#fid': 3L, - '#geometry': 'MULTIPOINT(10.0 20.0, 11.0 21.0)', - }, - 9L: { - 'id': u'8', - 'description': u'EWKT prefix', - '#fid': 9L, - '#geometry': 'POINT(10.0 10.0)', - }, - 10L: { - 'id': u'9', - 'description': u'Informix prefix', - '#fid': 10L, - '#geometry': 'POINT(10.0 10.0)', - }, - 11L: { - 'id': u'10', - 'description': u'Measure in point', - '#fid': 11L, - '#geometry': 'POINT(10.0 20.0)', - }, - } - wanted['log']=[ - u'Errors in file file', - u'1 records discarded due to invalid geometry definitions', - u'7 records discarded due to incompatible geometry types', - u'The following lines were not loaded into QGIS due to errors:', - u'Invalid WKT at line 8', - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_010_read_wkt_point(self): - description='Read WKT points' + def test_012_read_wkt_point(self): + # Read WKT points filename='testwkt.csv' params={'geomType': 'point', 'delimiter': '|', 'type': 'csv', 'wktField': 'geom_wkt'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=point&delimiter=|&type=csv&wktField=geom_wkt' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Point wkt', - '#fid': 2L, - '#geometry': 'POINT(10.0 20.0)', - }, - 3L: { - 'id': u'2', - 'description': u'Multipoint wkt', - '#fid': 3L, - '#geometry': 'MULTIPOINT(10.0 20.0, 11.0 21.0)', - }, - 9L: { - 'id': u'8', - 'description': u'EWKT prefix', - '#fid': 9L, - '#geometry': 'POINT(10.0 10.0)', - }, - 10L: { - 'id': u'9', - 'description': u'Informix prefix', - '#fid': 10L, - '#geometry': 'POINT(10.0 10.0)', - }, - 11L: { - 'id': u'10', - 'description': u'Measure in point', - '#fid': 11L, - '#geometry': 'POINT(10.0 20.0)', - }, - } - wanted['log']=[ - u'Errors in file file', - u'1 records discarded due to invalid geometry definitions', - u'7 records discarded due to incompatible geometry types', - u'The following lines were not loaded into QGIS due to errors:', - u'Invalid WKT at line 8', - ] - runTest(description,wanted,filename,requests,**params) + runTest(filename,requests,**params) - - def test_011_read_wkt_line(self): - description='Read WKT linestrings' + def test_013_read_wkt_line(self): + # Read WKT linestrings filename='testwkt.csv' params={'geomType': 'line', 'delimiter': '|', 'type': 'csv', 'wktField': 'geom_wkt'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=line&delimiter=|&type=csv&wktField=geom_wkt' - wanted['data']={ - 4L: { - 'id': u'3', - 'description': u'Linestring wkt', - '#fid': 4L, - '#geometry': 'LINESTRING(10.0 20.0, 11.0 21.0)', - }, - 5L: { - 'id': u'4', - 'description': u'Multiline string wkt', - '#fid': 5L, - '#geometry': 'MULTILINESTRING((10.0 20.0, 11.0 21.0), (20.0 30.0, 21.0 31.0))', - }, - 12L: { - 'id': u'11', - 'description': u'Measure in line', - '#fid': 12L, - '#geometry': 'LINESTRING(10.0 20.0, 11.0 21.0)', - }, - 13L: { - 'id': u'12', - 'description': u'Z in line', - '#fid': 13L, - '#geometry': 'LINESTRING(10.0 20.0, 11.0 21.0)', - }, - 14L: { - 'id': u'13', - 'description': u'Measure and Z in line', - '#fid': 14L, - '#geometry': 'LINESTRING(10.0 20.0, 11.0 21.0)', - }, - } - wanted['log']=[ - u'Errors in file file', - u'1 records discarded due to invalid geometry definitions', - u'7 records discarded due to incompatible geometry types', - u'The following lines were not loaded into QGIS due to errors:', - u'Invalid WKT at line 8', - ] - runTest(description,wanted,filename,requests,**params) + runTest(filename,requests,**params) - - def test_012_read_wkt_polygon(self): - description='Read WKT polygons' + def test_014_read_wkt_polygon(self): + # Read WKT polygons filename='testwkt.csv' params={'geomType': 'polygon', 'delimiter': '|', 'type': 'csv', 'wktField': 'geom_wkt'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=polygon&delimiter=|&type=csv&wktField=geom_wkt' - wanted['data']={ - 6L: { - 'id': u'5', - 'description': u'Polygon wkt', - '#fid': 6L, - '#geometry': 'POLYGON((10.0 10.0,10.0 20.0,20.0 20.0,20.0 10.0,10.0 10.0),(14.0 14.0,14.0 16.0,16.0 16.0,14.0 14.0))', - }, - 7L: { - 'id': u'6', - 'description': u'MultiPolygon wkt', - '#fid': 7L, - '#geometry': 'MULTIPOLYGON(((10.0 10.0,10.0 20.0,20.0 20.0,20.0 10.0,10.0 10.0),(14.0 14.0,14.0 16.0,16.0 16.0,14.0 14.0)),((30.0 30.0,30.0 35.0,35.0 35.0,30.0 30.0)))', - }, - } - wanted['log']=[ - u'Errors in file file', - u'1 records discarded due to invalid geometry definitions', - u'10 records discarded due to incompatible geometry types', - u'The following lines were not loaded into QGIS due to errors:', - u'Invalid WKT at line 8', - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_013_read_dms_xy(self): - description='Reading degrees/minutes/seconds angles' + def test_015_read_dms_xy(self): + # Reading degrees/minutes/seconds angles filename='testdms.csv' params={'yField': 'lat', 'xField': 'lon', 'type': 'csv', 'xyDms': 'yes'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?yField=lat&xField=lon&type=csv&xyDms=yes' - wanted['data']={ - 3L: { - 'id': u'1', - 'description': u'Basic DMS string', - 'lon': u'1 5 30.6', - 'lat': u'35 51 20', - '#fid': 3L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 4L: { - 'id': u'2', - 'description': u'Basic DMS string 2', - 'lon': u'1 05 30.6005', - 'lat': u'035 51 20', - '#fid': 4L, - '#geometry': 'POINT(1.09183347 35.85555556)', - }, - 5L: { - 'id': u'3', - 'description': u'Basic DMS string 3', - 'lon': u'1 05 30.6', - 'lat': u'35 59 9.99', - '#fid': 5L, - '#geometry': 'POINT(1.09183333 35.98610833)', - }, - 7L: { - 'id': u'4', - 'description': u'Prefix sign 1', - 'lon': u'n1 05 30.6', - 'lat': u'e035 51 20', - '#fid': 7L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 8L: { - 'id': u'5', - 'description': u'Prefix sign 2', - 'lon': u'N1 05 30.6', - 'lat': u'E035 51 20', - '#fid': 8L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 9L: { - 'id': u'6', - 'description': u'Prefix sign 3', - 'lon': u'N 1 05 30.6', - 'lat': u'E 035 51 20', - '#fid': 9L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 10L: { - 'id': u'7', - 'description': u'Prefix sign 4', - 'lon': u'S1 05 30.6', - 'lat': u'W035 51 20', - '#fid': 10L, - '#geometry': 'POINT(-1.09183333 -35.85555556)', - }, - 11L: { - 'id': u'8', - 'description': u'Prefix sign 5', - 'lon': u'+1 05 30.6', - 'lat': u'+035 51 20', - '#fid': 11L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 12L: { - 'id': u'9', - 'description': u'Prefix sign 6', - 'lon': u'-1 05 30.6', - 'lat': u'-035 51 20', - '#fid': 12L, - '#geometry': 'POINT(-1.09183333 -35.85555556)', - }, - 14L: { - 'id': u'10', - 'description': u'Postfix sign 1', - 'lon': u'1 05 30.6n', - 'lat': u'035 51 20e', - '#fid': 14L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 15L: { - 'id': u'11', - 'description': u'Postfix sign 2', - 'lon': u'1 05 30.6N', - 'lat': u'035 51 20E', - '#fid': 15L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 16L: { - 'id': u'12', - 'description': u'Postfix sign 3', - 'lon': u'1 05 30.6 N', - 'lat': u'035 51 20 E', - '#fid': 16L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 17L: { - 'id': u'13', - 'description': u'Postfix sign 4', - 'lon': u'1 05 30.6S', - 'lat': u'035 51 20W', - '#fid': 17L, - '#geometry': 'POINT(-1.09183333 -35.85555556)', - }, - 18L: { - 'id': u'14', - 'description': u'Postfix sign 5', - 'lon': u'1 05 30.6+', - 'lat': u'035 51 20+', - '#fid': 18L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 19L: { - 'id': u'15', - 'description': u'Postfix sign 6', - 'lon': u'1 05 30.6-', - 'lat': u'035 51 20-', - '#fid': 19L, - '#geometry': 'POINT(-1.09183333 -35.85555556)', - }, - 21L: { - 'id': u'16', - 'description': u'Leading and trailing blanks 1', - 'lon': u' 1 05 30.6', - 'lat': u'035 51 20 ', - '#fid': 21L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 22L: { - 'id': u'17', - 'description': u'Leading and trailing blanks 2', - 'lon': u' N 1 05 30.6', - 'lat': u'035 51 20 E ', - '#fid': 22L, - '#geometry': 'POINT(1.09183333 35.85555556)', - }, - 24L: { - 'id': u'18', - 'description': u'Alternative characters for D,M,S', - 'lon': u'1d05m30.6s S', - 'lat': u"35d51'20", - '#fid': 24L, - '#geometry': 'POINT(-1.09183333 35.85555556)', - }, - 25L: { - 'id': u'19', - 'description': u'Degrees/minutes format', - 'lon': u'1 05.23', - 'lat': u'4 55.03', - '#fid': 25L, - '#geometry': 'POINT(1.08716667 4.91716667)', - }, - } - wanted['log']=[ - u'Errors in file file', - u'5 records discarded due to invalid geometry definitions', - u'The following lines were not loaded into QGIS due to errors:', - u'Invalid X or Y fields at line 27', - u'Invalid X or Y fields at line 28', - u'Invalid X or Y fields at line 29', - u'Invalid X or Y fields at line 30', - u'Invalid X or Y fields at line 31', - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_014_decimal_point(self): - description='Reading degrees/minutes/seconds angles' + def test_016_decimal_point(self): + # Reading degrees/minutes/seconds angles filename='testdp.csv' params={'yField': 'geom_y', 'xField': 'geom_x', 'type': 'csv', 'delimiter': ';', 'decimalPoint': ','} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?yField=geom_y&xField=geom_x&type=csv&delimiter=;&decimalPoint=,' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Comma as decimal point 1', - 'geom_x': u'10', - 'geom_y': u'20', - 'other': u'30', - 'text field': u'Field with , in it', - '#fid': 2L, - '#geometry': 'POINT(10.0 20.0)', - }, - 3L: { - 'id': u'2', - 'description': u'Comma as decimal point 2', - 'geom_x': u'12', - 'geom_y': u'25.003', - 'other': u'-38.55', - 'text field': u'Plain text field', - '#fid': 3L, - '#geometry': 'POINT(12.0 25.003)', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) + runTest(filename,requests,**params) - - def test_015_regular_expression_1(self): - description='Parsing regular expression delimiter' + def test_017_regular_expression_1(self): + # Parsing regular expression delimiter filename='testre.txt' params={'geomType': 'none', 'trimFields': 'Y', 'delimiter': 'RE(?:GEXP)?', 'type': 'regexp'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&trimFields=Y&delimiter=RE(?:GEXP)?&type=regexp' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Basic regular expression test', - 'data': u'data1', - 'info': u'info', - '#fid': 2L, - '#geometry': 'None', - }, - 3L: { - 'id': u'2', - 'description': u'Basic regular expression test 2', - 'data': u'data2', - 'info': u'info2', - '#fid': 3L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_016_regular_expression_2(self): - description='Parsing regular expression delimiter with capture groups' + def test_018_regular_expression_2(self): + # Parsing regular expression delimiter with capture groups filename='testre.txt' params={'geomType': 'none', 'trimFields': 'Y', 'delimiter': '(RE)(GEXP)?', 'type': 'regexp'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&trimFields=Y&delimiter=(RE)(GEXP)?&type=regexp' - wanted['data']={ - 2L: { - 'id': u'1', - 'RE': u'RE', - 'GEXP': u'GEXP', - 'description': u'RE', - 'RE_1': u'RE', - 'GEXP_1': u'GEXP', - 'data': u'data1', - 'RE_2': u'RE', - 'GEXP_2': u'GEXP', - 'info': u'info', - '#fid': 2L, - '#geometry': 'None', - }, - 3L: { - 'id': u'2', - 'RE': u'RE', - 'GEXP': u'GEXP', - 'description': u'RE', - 'RE_1': u'RE', - 'GEXP_1': u'', - 'data': u'data2', - 'RE_2': u'RE', - 'GEXP_2': u'', - 'info': u'info2', - '#fid': 3L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_017_regular_expression_3(self): - description='Parsing anchored regular expression' + def test_019_regular_expression_3(self): + # Parsing anchored regular expression filename='testre2.txt' params={'geomType': 'none', 'trimFields': 'Y', 'delimiter': '^(.{5})(.{30})(.{5,})', 'type': 'regexp'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&trimFields=Y&delimiter=^(.{5})(.{30})(.{5,})&type=regexp' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Anchored regexp', - 'information': u'Some data', - '#fid': 2L, - '#geometry': 'None', - }, - 4L: { - 'id': u'3', - 'description': u'Anchored regexp recovered', - 'information': u'Some data', - '#fid': 4L, - '#geometry': 'None', - }, - } - wanted['log']=[ - u'Errors in file file', - u'1 records discarded due to invalid format', - u'The following lines were not loaded into QGIS due to errors:', - u'Invalid record format at line 3', - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_017a_regular_expression_4(self): - description='Parsing zero length re' + def test_020_regular_expression_4(self): + # Parsing zero length re filename='testre3.txt' params={'geomType': 'none', 'delimiter': 'x?', 'type': 'regexp'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&delimiter=x?&type=regexp' - wanted['data']={ - 2L: { - 'id': u'f', - 'description': u'i', - 's': u'f', - 'm': u'i', - 'a': u'.', - 'l': u'.', - 'l_1': u'i', - 'field_6': u'l', - 'field_7': u'e', - '#fid': 2L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) + runTest(filename,requests,**params) - - def test_017a_regular_expression_5(self): - description='Parsing zero length re 2' + def test_021_regular_expression_5(self): + # Parsing zero length re 2 filename='testre3.txt' params={'geomType': 'none', 'delimiter': '\\b', 'type': 'regexp'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&delimiter=\\b&type=regexp' - wanted['data']={ - 2L: { - 'id': u'fi', - 'description': u'..', - 'small': u'fi', - 'field_2': u'..', - 'field_3': u'ile', - '#fid': 2L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) + runTest(filename,requests,**params) - - def test_018_utf8_encoded_file(self): - description='UTF8 encoded file test' + def test_022_utf8_encoded_file(self): + # UTF8 encoded file test filename='testutf8.csv' params={'geomType': 'none', 'delimiter': '|', 'type': 'csv', 'encoding': 'utf-8'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&delimiter=|&type=csv&encoding=utf-8' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Correctly read UTF8 encoding', - 'name': u'Field has \u0101cc\xe8nt\xe9d text', - '#fid': 2L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_019_latin1_encoded_file(self): - description='Latin1 encoded file test' + def test_023_latin1_encoded_file(self): + # Latin1 encoded file test filename='testlatin1.csv' params={'geomType': 'none', 'delimiter': '|', 'type': 'csv', 'encoding': 'latin1'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&delimiter=|&type=csv&encoding=latin1' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Correctly read latin1 encoding', - 'name': u'This test is \xa9', - '#fid': 2L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - - def test_030_filter_rect_xy(self): - description='Filter extents on XY layer' + def test_024_filter_rect_xy(self): + # Filter extents on XY layer filename='testextpt.txt' params={'yField': 'y', 'delimiter': '|', 'type': 'csv', 'xField': 'x'} requests=[ - {'extents': [10, 30, 30, 50]}, - {'extents': [10, 30, 30, 50], 'exact': 1}, + {'extents': [10, 30, 30, 50]}, + {'extents': [10, 30, 30, 50], 'exact': 1}, {'extents': [110, 130, 130, 150]}] - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?yField=y&delimiter=|&type=csv&xField=x' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Inside', - 'x': u'15', - 'y': u'35', - '#fid': 2L, - '#geometry': 'POINT(15.0 35.0)', - }, - 10L: { - 'id': u'9', - 'description': u'Inside 2', - 'x': u'25', - 'y': u'45', - '#fid': 10L, - '#geometry': 'POINT(25.0 45.0)', - }, - 1002L: { - 'id': u'1', - 'description': u'Inside', - 'x': u'15', - 'y': u'35', - '#fid': 2L, - '#geometry': 'POINT(15.0 35.0)', - }, - 1010L: { - 'id': u'9', - 'description': u'Inside 2', - 'x': u'25', - 'y': u'45', - '#fid': 10L, - '#geometry': 'POINT(25.0 45.0)', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_031_filter_rect_wkt(self): - description='Filter extents on WKT layer' + def test_025_filter_rect_wkt(self): + # Filter extents on WKT layer filename='testextw.txt' params={'delimiter': '|', 'type': 'csv', 'wktField': 'wkt'} requests=[ - {'extents': [10, 30, 30, 50]}, - {'extents': [10, 30, 30, 50], 'exact': 1}, + {'extents': [10, 30, 30, 50]}, + {'extents': [10, 30, 30, 50], 'exact': 1}, {'extents': [110, 130, 130, 150]}] - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?delimiter=|&type=csv&wktField=wkt' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Inside', - '#fid': 2L, - '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', - }, - 4L: { - 'id': u'3', - 'description': u'Crossing', - '#fid': 4L, - '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', - }, - 5L: { - 'id': u'4', - 'description': u'Bounding box overlap', - '#fid': 5L, - '#geometry': 'LINESTRING(5.0 30.0, 5.0 55.0, 30.0 55.0)', - }, - 6L: { - 'id': u'5', - 'description': u'Crossing 2', - '#fid': 6L, - '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', - }, - 7L: { - 'id': u'6', - 'description': u'Bounding box overlap 2', - '#fid': 7L, - '#geometry': 'LINESTRING(28.0 29.0, 31.0 29.0, 31.0 33.0)', - }, - 1002L: { - 'id': u'1', - 'description': u'Inside', - '#fid': 2L, - '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', - }, - 1004L: { - 'id': u'3', - 'description': u'Crossing', - '#fid': 4L, - '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', - }, - 1006L: { - 'id': u'5', - 'description': u'Crossing 2', - '#fid': 6L, - '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - + runTest(filename,requests,**params) - def test_032_filter_fid(self): - description='Filter on feature id' + def test_026_filter_fid(self): + # Filter on feature id filename='test.csv' params={'geomType': 'none', 'type': 'csv'} requests=[ - {'fid': 3}, - {'fid': 9}, - {'fid': 5}, - {'fid': 7}, + {'fid': 3}, + {'fid': 9}, + {'fid': 20}, {'fid': 3}] - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&type=csv' - wanted['data']={ - 3L: { - 'id': u'2', - 'description': u'Quoted field', - 'data': u'Quoted data', - 'info': u'Unquoted', - 'field_5': u'', - '#fid': 3L, - '#geometry': 'None', - }, - 1009L: { - 'id': u'5', - 'description': u'Extra fields', - 'data': u'data', - 'info': u'info', - 'field_5': u'message', - '#fid': 9L, - '#geometry': 'None', - }, - 2005L: { - 'id': u'4', - 'description': u'Quoted newlines', - 'data': u'Line 1\nLine 2\n\nLine 4', - 'info': u'No data', - 'field_5': u'', - '#fid': 5L, - '#geometry': 'None', - }, - 4003L: { - 'id': u'2', - 'description': u'Quoted field', - 'data': u'Quoted data', - 'info': u'Unquoted', - 'field_5': u'', - '#fid': 3L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) - - + runTest(filename,requests,**params) - def test_033_filter_attributes(self): - description='Filter on attributes' + def test_027_filter_attributes(self): + # Filter on attributes filename='test.csv' params={'geomType': 'none', 'type': 'csv'} requests=[ - {'attributes': [1, 3]}, - {'fid': 9}, - {'attributes': [1, 3], 'fid': 9}, - {'attributes': [3, 1], 'fid': 9}, - {'attributes': [1, 3, 7], 'fid': 9}, + {'attributes': [1, 3]}, + {'fid': 9}, + {'attributes': [1, 3], 'fid': 9}, + {'attributes': [3, 1], 'fid': 9}, + {'attributes': [1, 3, 7], 'fid': 9}, {'attributes': [], 'fid': 9}] - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&type=csv' - wanted['data']={ - 2L: { - 'id': u'', - 'description': u'Basic unquoted record', - 'data': u'', - 'info': u'Some info', - 'field_5': u'', - '#fid': 2L, - '#geometry': 'None', - }, - 3L: { - 'id': u'', - 'description': u'Quoted field', - 'data': u'', - 'info': u'Unquoted', - 'field_5': u'', - '#fid': 3L, - '#geometry': 'None', - }, - 4L: { - 'id': u'', - 'description': u'Escaped quotes', - 'data': u'', - 'info': u'Unquoted', - 'field_5': u'', - '#fid': 4L, - '#geometry': 'None', - }, - 5L: { - 'id': u'', - 'description': u'Quoted newlines', - 'data': u'', - 'info': u'No data', - 'field_5': u'', - '#fid': 5L, - '#geometry': 'None', - }, - 9L: { - 'id': u'', - 'description': u'Extra fields', - 'data': u'', - 'info': u'info', - 'field_5': u'', - '#fid': 9L, - '#geometry': 'None', - }, - 10L: { - 'id': u'', - 'description': u'Missing fields', - 'data': u'', - 'info': u'', - 'field_5': u'', - '#fid': 10L, - '#geometry': 'None', - }, - 1009L: { - 'id': u'5', - 'description': u'Extra fields', - 'data': u'data', - 'info': u'info', - 'field_5': u'message', - '#fid': 9L, - '#geometry': 'None', - }, - 2009L: { - 'id': u'', - 'description': u'Extra fields', - 'data': u'', - 'info': u'info', - 'field_5': u'', - '#fid': 9L, - '#geometry': 'None', - }, - 3009L: { - 'id': u'', - 'description': u'Extra fields', - 'data': u'', - 'info': u'info', - 'field_5': u'', - '#fid': 9L, - '#geometry': 'None', - }, - 4009L: { - 'id': u'', - 'description': u'Extra fields', - 'data': u'', - 'info': u'info', - 'field_5': u'', - '#fid': 9L, - '#geometry': 'None', - }, - 5009L: { - 'id': u'', - 'description': u'', - 'data': u'', - 'info': u'', - 'field_5': u'', - '#fid': 9L, - '#geometry': 'None', - }, - } - wanted['log']=[ - ] - runTest(description,wanted,filename,requests,**params) + runTest(filename,requests,**params) - def test_034_substring_test(self): - description='CSV file parsing' + def test_028_substring_test(self): + # CSV file parsing filename='test.csv' params={'geomType': 'none', 'subset': 'id % 2 = 1', 'type': 'csv'} requests=None - if rebuildTests: - createTest(description,filename,requests,**params) - assert False,"Set rebuildTests to False to run delimited text tests" - wanted={} - wanted['uri']=u'file://file?geomType=none&type=csv&subset=id%20%25%202%20%3D%201' - wanted['data']={ - 2L: { - 'id': u'1', - 'description': u'Basic unquoted record', - 'data': u'Some data', - 'info': u'Some info', - 'field_5': u'', - '#fid': 2L, - '#geometry': 'None', - }, - 4L: { - 'id': u'3', - 'description': u'Escaped quotes', - 'data': u'Quoted "citation" data', - 'info': u'Unquoted', - 'field_5': u'', - '#fid': 4L, - '#geometry': 'None', - }, - 9L: { - 'id': u'5', - 'description': u'Extra fields', - 'data': u'data', - 'info': u'info', - 'field_5': u'message', - '#fid': 9L, - '#geometry': 'None', - }, - } - wanted['log']=[ + runTest(filename,requests,**params) + + def test_029_file_watcher(self): + # Testing file watcher + (filehandle,filename) = tempfile.mkstemp() + with os.fdopen(filehandle,"w") as f: + f.write("id,name\n1,rabbit\n2,pooh\n") + QCoreApplication.instance().processEvents() + def updatefile1( layer ): + with file(filename,'a') as f: + f.write('3,tigger\n') + QCoreApplication.instance().processEvents() + def updatefile2( layer ): + with file(filename,'w') as f: + f.write("name,size,id\ntoad,small,5\nmole,medium,6\nbadger,big,7\n") + QCoreApplication.instance().processEvents() + def deletefile( layer ): + os.remove(filename) + params={'geomType': 'none', 'type': 'csv' } + requests=[ + {'fid': 3}, + {}, + {'fid': 7}, + updatefile1, + {'fid': 3}, + {'fid': 4}, + {}, + {'fid': 7}, + updatefile2, + {'fid': 2}, + {}, + {'fid': 7}, + deletefile, + {'fid': 2}, + {}, ] - runTest(description,wanted,filename,requests,**params) + runTest(filename,requests,**params) + + def test_030_filter_rect_xy_spatial_index(self): + # Filter extents on XY layer with spatial index + filename='testextpt.txt' + params={'yField': 'y', 'delimiter': '|', 'type': 'csv', 'xField': 'x', 'spatialIndex': 'Y' } + requests=[ + {'extents': [10, 30, 30, 50]}, + {'extents': [10, 30, 30, 50], 'exact': 1}, + {'extents': [110, 130, 130, 150]}, + {}, + {'extents': [-1000, -1000, 1000, 1000]} + ] + runTest(filename,requests,**params) + + def test_031_filter_rect_wkt_spatial_index(self): + # Filter extents on WKT layer with spatial index + filename='testextw.txt' + params={'delimiter': '|', 'type': 'csv', 'wktField': 'wkt', 'spatialIndex': 'Y' } + requests=[ + {'extents': [10, 30, 30, 50]}, + {'extents': [10, 30, 30, 50], 'exact': 1}, + {'extents': [110, 130, 130, 150]}, + {}, + {'extents': [-1000, -1000, 1000, 1000]} + ] + runTest(filename,requests,**params) + + def test_032_filter_rect_wkt_create_spatial_index(self): + # Filter extents on WKT layer building spatial index + filename='testextw.txt' + params={'delimiter': '|', 'type': 'csv', 'wktField': 'wkt' } + requests=[ + {'extents': [10, 30, 30, 50]}, + {}, + lambda layer: layer.dataProvider().createSpatialIndex(), + {'extents': [10, 30, 30, 50]}, + {'extents': [10, 30, 30, 50], 'exact': 1}, + {'extents': [110, 130, 130, 150]}, + {}, + {'extents': [-1000, -1000, 1000, 1000]} + ] + runTest(filename,requests,**params) + + + def test_033_reset_subset_string(self): + # CSV file parsing + filename='test.csv' + params={'geomType': 'none', 'type': 'csv'} + requests=[ + {}, + lambda layer: layer.dataProvider().setSubsetString("id % 2 = 1",True), + {}, + lambda layer: layer.dataProvider().setSubsetString("id = 6",False), + {}, + lambda layer: layer.dataProvider().setSubsetString("id = 3",False), + {}, + lambda layer: layer.dataProvider().setSubsetString("id % 2 = 1",True), + {}, + lambda layer: layer.dataProvider().setSubsetString("id % 2 = 0",True), + {}, + ] + runTest(filename,requests,**params) -#END if __name__ == '__main__': unittest.main() diff --git a/tests/src/python/test_qgsdelimitedtextprovider_wanted.py b/tests/src/python/test_qgsdelimitedtextprovider_wanted.py new file mode 100644 index 000000000000..db939e3b742d --- /dev/null +++ b/tests/src/python/test_qgsdelimitedtextprovider_wanted.py @@ -0,0 +1,2030 @@ + +def test_002_load_csv_file(): + wanted={} + wanted['uri']=u'file://test.csv?geomType=none&type=csv' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Basic unquoted record', + 'data': u'Some data', + 'info': u'Some info', + 'field_5': u'', + '#fid': 2L, + '#geometry': 'None', + }, + 3L: { + 'id': u'2', + 'description': u'Quoted field', + 'data': u'Quoted data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 3L, + '#geometry': 'None', + }, + 4L: { + 'id': u'3', + 'description': u'Escaped quotes', + 'data': u'Quoted "citation" data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 4L, + '#geometry': 'None', + }, + 5L: { + 'id': u'4', + 'description': u'Quoted newlines', + 'data': u'Line 1\nLine 2\n\nLine 4', + 'info': u'No data', + 'field_5': u'', + '#fid': 5L, + '#geometry': 'None', + }, + 9L: { + 'id': u'5', + 'description': u'Extra fields', + 'data': u'data', + 'info': u'info', + 'field_5': u'message', + '#fid': 9L, + '#geometry': 'None', + }, + 10L: { + 'id': u'6', + 'description': u'Missing fields', + 'data': u'', + 'info': u'', + 'field_5': u'', + '#fid': 10L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_003_field_naming(): + wanted={} + wanted['uri']=u'file://testfields.csv?geomType=none&type=csv' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Generation of field names', + 'data': u'Some data', + 'field_4': u'Some info', + 'data_2': u'', + 'field_6': u'', + 'field_7': u'', + 'field_3_1': u'', + 'data_1': u'', + 'field_10': u'', + 'field_11': u'', + 'field_12': u'last data', + '#fid': 2L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_004_max_fields(): + wanted={} + wanted['uri']=u'file://testfields.csv?geomType=none&maxFields=7&type=csv' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Generation of field names', + 'data': u'Some data', + 'field_4': u'Some info', + 'data_1': u'', + 'field_6': u'', + 'field_7': u'', + '#fid': 2L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_005_load_whitespace(): + wanted={} + wanted['uri']=u'file://test.space?geomType=none&type=whitespace' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Simple_whitespace_file', + 'data': u'data1', + 'info': u'info1', + 'field_5': u'', + 'field_6': u'', + '#fid': 2L, + '#geometry': 'None', + }, + 3L: { + 'id': u'2', + 'description': u'Whitespace_at_start_of_line', + 'data': u'data2', + 'info': u'info2', + 'field_5': u'', + 'field_6': u'', + '#fid': 3L, + '#geometry': 'None', + }, + 4L: { + 'id': u'3', + 'description': u'Tab_whitespace', + 'data': u'data3', + 'info': u'info3', + 'field_5': u'', + 'field_6': u'', + '#fid': 4L, + '#geometry': 'None', + }, + 5L: { + 'id': u'4', + 'description': u'Multiple_whitespace_characters', + 'data': u'data4', + 'info': u'info4', + 'field_5': u'', + 'field_6': u'', + '#fid': 5L, + '#geometry': 'None', + }, + 6L: { + 'id': u'5', + 'description': u'Extra_fields', + 'data': u'data5', + 'info': u'info5', + 'field_5': u'message5', + 'field_6': u'rubbish5', + '#fid': 6L, + '#geometry': 'None', + }, + 7L: { + 'id': u'6', + 'description': u'Missing_fields', + 'data': u'', + 'info': u'', + 'field_5': u'', + 'field_6': u'', + '#fid': 7L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_006_quote_escape(): + wanted={} + wanted['uri']=u'file://test.pipe?geomType=none"e="&delimiter=|&escape=\\' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Using pipe delimiter', + 'data': u'data 1', + 'info': u'info 1', + 'field_5': u'', + 'field_6': u'', + '#fid': 2L, + '#geometry': 'None', + }, + 3L: { + 'id': u'2', + 'description': u'Using backslash escape on pipe', + 'data': u'data 2 | piped', + 'info': u'info2', + 'field_5': u'', + 'field_6': u'', + '#fid': 3L, + '#geometry': 'None', + }, + 4L: { + 'id': u'3', + 'description': u'Backslash escaped newline', + 'data': u'data3 \nline2 \nline3', + 'info': u'info3', + 'field_5': u'', + 'field_6': u'', + '#fid': 4L, + '#geometry': 'None', + }, + 7L: { + 'id': u'4', + 'description': u'Empty field', + 'data': u'', + 'info': u'info4', + 'field_5': u'', + 'field_6': u'', + '#fid': 7L, + '#geometry': 'None', + }, + 8L: { + 'id': u'5', + 'description': u'Quoted field', + 'data': u'More | piped data', + 'info': u'info5', + 'field_5': u'', + 'field_6': u'', + '#fid': 8L, + '#geometry': 'None', + }, + 9L: { + 'id': u'6', + 'description': u'Escaped quote', + 'data': u'Field "citation" ', + 'info': u'info6', + 'field_5': u'', + 'field_6': u'', + '#fid': 9L, + '#geometry': 'None', + }, + 10L: { + 'id': u'7', + 'description': u'Missing fields', + 'data': u'', + 'info': u'', + 'field_5': u'', + 'field_6': u'', + '#fid': 10L, + '#geometry': 'None', + }, + 11L: { + 'id': u'8', + 'description': u'Extra fields', + 'data': u'data8', + 'info': u'info8', + 'field_5': u'message8', + 'field_6': u'more', + '#fid': 11L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_007_multiple_quote(): + wanted={} + wanted['uri']=u'file://test.quote?geomType=none"e=\'"&type=csv&escape="\'' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Multiple quotes 1', + 'data': u'Quoted,data1', + 'info': u'info1', + '#fid': 2L, + '#geometry': 'None', + }, + 3L: { + 'id': u'2', + 'description': u'Multiple quotes 2', + 'data': u'Quoted,data2', + 'info': u'info2', + '#fid': 3L, + '#geometry': 'None', + }, + 4L: { + 'id': u'3', + 'description': u'Leading and following whitespace', + 'data': u'Quoted, data3', + 'info': u'info3', + '#fid': 4L, + '#geometry': 'None', + }, + 5L: { + 'id': u'4', + 'description': u'Embedded quotes 1', + 'data': u'Quoted \'\'"\'\' data4', + 'info': u'info4', + '#fid': 5L, + '#geometry': 'None', + }, + 6L: { + 'id': u'5', + 'description': u'Embedded quotes 2', + 'data': u'Quoted \'""\' data5', + 'info': u'info5', + '#fid': 6L, + '#geometry': 'None', + }, + 10L: { + 'id': u'9', + 'description': u'Final record', + 'data': u'date9', + 'info': u'info9', + '#fid': 10L, + '#geometry': 'None', + }, + } + wanted['log']=[ + u'Errors in file file', + u'3 records discarded due to invalid format', + u'The following lines were not loaded into QGIS due to errors:', + u'Invalid record format at line 7', + u'Invalid record format at line 8', + u'Invalid record format at line 9', + ] + return wanted + + +def test_008_badly_formed_quotes(): + wanted={} + wanted['uri']=u'file://test.badquote?geomType=none"e="&type=csv&escape="' + wanted['data']={ + 4L: { + 'id': u'3', + 'description': u'Recovered after unclosed quore', + 'data': u'Data ok', + 'info': u'inf3', + '#fid': 4L, + '#geometry': 'None', + }, + } + wanted['log']=[ + u'Errors in file file', + u'2 records discarded due to invalid format', + u'The following lines were not loaded into QGIS due to errors:', + u'Invalid record format at line 2', + u'Invalid record format at line 5', + ] + return wanted + + +def test_009_skip_lines(): + wanted={} + wanted['uri']=u'file://test2.csv?geomType=none&skipLines=2&type=csv&useHeader=no' + wanted['data']={ + 3L: { + 'id': u'3', + 'description': u'Less data', + 'field_1': u'3', + 'field_2': u'Less data', + 'field_3': u'data3', + '#fid': 3L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_010_read_coordinates(): + wanted={} + wanted['uri']=u'file://testpt.csv?yField=geom_y&xField=geom_x&type=csv' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Basic point', + 'geom_x': u'10', + 'geom_y': u'20', + '#fid': 2L, + '#geometry': 'POINT(10.0 20.0)', + }, + 3L: { + 'id': u'2', + 'description': u'Integer point', + 'geom_x': u'11', + 'geom_y': u'22', + '#fid': 3L, + '#geometry': 'POINT(11.0 22.0)', + }, + 5L: { + 'id': u'4', + 'description': u'Final point', + 'geom_x': u'13', + 'geom_y': u'23', + '#fid': 5L, + '#geometry': 'POINT(13.0 23.0)', + }, + } + wanted['log']=[ + u'Errors in file file', + u'1 records discarded due to invalid geometry definitions', + u'The following lines were not loaded into QGIS due to errors:', + u'Invalid X or Y fields at line 4', + ] + return wanted + + +def test_011_read_wkt(): + wanted={} + wanted['uri']=u'file://testwkt.csv?delimiter=|&type=csv&wktField=geom_wkt' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Point wkt', + '#fid': 2L, + '#geometry': 'POINT(10.0 20.0)', + }, + 3L: { + 'id': u'2', + 'description': u'Multipoint wkt', + '#fid': 3L, + '#geometry': 'MULTIPOINT(10.0 20.0, 11.0 21.0)', + }, + 9L: { + 'id': u'8', + 'description': u'EWKT prefix', + '#fid': 9L, + '#geometry': 'POINT(10.0 10.0)', + }, + 10L: { + 'id': u'9', + 'description': u'Informix prefix', + '#fid': 10L, + '#geometry': 'POINT(10.0 10.0)', + }, + 11L: { + 'id': u'10', + 'description': u'Measure in point', + '#fid': 11L, + '#geometry': 'POINT(10.0 20.0)', + }, + } + wanted['log']=[ + u'Errors in file file', + u'1 records discarded due to invalid geometry definitions', + u'7 records discarded due to incompatible geometry types', + u'The following lines were not loaded into QGIS due to errors:', + u'Invalid WKT at line 8', + ] + return wanted + + +def test_012_read_wkt_point(): + wanted={} + wanted['uri']=u'file://testwkt.csv?geomType=point&delimiter=|&type=csv&wktField=geom_wkt' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Point wkt', + '#fid': 2L, + '#geometry': 'POINT(10.0 20.0)', + }, + 3L: { + 'id': u'2', + 'description': u'Multipoint wkt', + '#fid': 3L, + '#geometry': 'MULTIPOINT(10.0 20.0, 11.0 21.0)', + }, + 9L: { + 'id': u'8', + 'description': u'EWKT prefix', + '#fid': 9L, + '#geometry': 'POINT(10.0 10.0)', + }, + 10L: { + 'id': u'9', + 'description': u'Informix prefix', + '#fid': 10L, + '#geometry': 'POINT(10.0 10.0)', + }, + 11L: { + 'id': u'10', + 'description': u'Measure in point', + '#fid': 11L, + '#geometry': 'POINT(10.0 20.0)', + }, + } + wanted['log']=[ + u'Errors in file file', + u'1 records discarded due to invalid geometry definitions', + u'7 records discarded due to incompatible geometry types', + u'The following lines were not loaded into QGIS due to errors:', + u'Invalid WKT at line 8', + ] + return wanted + + +def test_013_read_wkt_line(): + wanted={} + wanted['uri']=u'file://testwkt.csv?geomType=line&delimiter=|&type=csv&wktField=geom_wkt' + wanted['data']={ + 4L: { + 'id': u'3', + 'description': u'Linestring wkt', + '#fid': 4L, + '#geometry': 'LINESTRING(10.0 20.0, 11.0 21.0)', + }, + 5L: { + 'id': u'4', + 'description': u'Multiline string wkt', + '#fid': 5L, + '#geometry': 'MULTILINESTRING((10.0 20.0, 11.0 21.0), (20.0 30.0, 21.0 31.0))', + }, + 12L: { + 'id': u'11', + 'description': u'Measure in line', + '#fid': 12L, + '#geometry': 'LINESTRING(10.0 20.0, 11.0 21.0)', + }, + 13L: { + 'id': u'12', + 'description': u'Z in line', + '#fid': 13L, + '#geometry': 'LINESTRING(10.0 20.0, 11.0 21.0)', + }, + 14L: { + 'id': u'13', + 'description': u'Measure and Z in line', + '#fid': 14L, + '#geometry': 'LINESTRING(10.0 20.0, 11.0 21.0)', + }, + } + wanted['log']=[ + u'Errors in file file', + u'1 records discarded due to invalid geometry definitions', + u'7 records discarded due to incompatible geometry types', + u'The following lines were not loaded into QGIS due to errors:', + u'Invalid WKT at line 8', + ] + return wanted + + +def test_014_read_wkt_polygon(): + wanted={} + wanted['uri']=u'file://testwkt.csv?geomType=polygon&delimiter=|&type=csv&wktField=geom_wkt' + wanted['data']={ + 6L: { + 'id': u'5', + 'description': u'Polygon wkt', + '#fid': 6L, + '#geometry': 'POLYGON((10.0 10.0,10.0 20.0,20.0 20.0,20.0 10.0,10.0 10.0),(14.0 14.0,14.0 16.0,16.0 16.0,14.0 14.0))', + }, + 7L: { + 'id': u'6', + 'description': u'MultiPolygon wkt', + '#fid': 7L, + '#geometry': 'MULTIPOLYGON(((10.0 10.0,10.0 20.0,20.0 20.0,20.0 10.0,10.0 10.0),(14.0 14.0,14.0 16.0,16.0 16.0,14.0 14.0)),((30.0 30.0,30.0 35.0,35.0 35.0,30.0 30.0)))', + }, + } + wanted['log']=[ + u'Errors in file file', + u'1 records discarded due to invalid geometry definitions', + u'10 records discarded due to incompatible geometry types', + u'The following lines were not loaded into QGIS due to errors:', + u'Invalid WKT at line 8', + ] + return wanted + + +def test_015_read_dms_xy(): + wanted={} + wanted['uri']=u'file://testdms.csv?yField=lat&xField=lon&type=csv&xyDms=yes' + wanted['data']={ + 3L: { + 'id': u'1', + 'description': u'Basic DMS string', + 'lon': u'1 5 30.6', + 'lat': u'35 51 20', + '#fid': 3L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 4L: { + 'id': u'2', + 'description': u'Basic DMS string 2', + 'lon': u'1 05 30.6005', + 'lat': u'035 51 20', + '#fid': 4L, + '#geometry': 'POINT(1.09183347 35.85555556)', + }, + 5L: { + 'id': u'3', + 'description': u'Basic DMS string 3', + 'lon': u'1 05 30.6', + 'lat': u'35 59 9.99', + '#fid': 5L, + '#geometry': 'POINT(1.09183333 35.98610833)', + }, + 7L: { + 'id': u'4', + 'description': u'Prefix sign 1', + 'lon': u'n1 05 30.6', + 'lat': u'e035 51 20', + '#fid': 7L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 8L: { + 'id': u'5', + 'description': u'Prefix sign 2', + 'lon': u'N1 05 30.6', + 'lat': u'E035 51 20', + '#fid': 8L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 9L: { + 'id': u'6', + 'description': u'Prefix sign 3', + 'lon': u'N 1 05 30.6', + 'lat': u'E 035 51 20', + '#fid': 9L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 10L: { + 'id': u'7', + 'description': u'Prefix sign 4', + 'lon': u'S1 05 30.6', + 'lat': u'W035 51 20', + '#fid': 10L, + '#geometry': 'POINT(-1.09183333 -35.85555556)', + }, + 11L: { + 'id': u'8', + 'description': u'Prefix sign 5', + 'lon': u'+1 05 30.6', + 'lat': u'+035 51 20', + '#fid': 11L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 12L: { + 'id': u'9', + 'description': u'Prefix sign 6', + 'lon': u'-1 05 30.6', + 'lat': u'-035 51 20', + '#fid': 12L, + '#geometry': 'POINT(-1.09183333 -35.85555556)', + }, + 14L: { + 'id': u'10', + 'description': u'Postfix sign 1', + 'lon': u'1 05 30.6n', + 'lat': u'035 51 20e', + '#fid': 14L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 15L: { + 'id': u'11', + 'description': u'Postfix sign 2', + 'lon': u'1 05 30.6N', + 'lat': u'035 51 20E', + '#fid': 15L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 16L: { + 'id': u'12', + 'description': u'Postfix sign 3', + 'lon': u'1 05 30.6 N', + 'lat': u'035 51 20 E', + '#fid': 16L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 17L: { + 'id': u'13', + 'description': u'Postfix sign 4', + 'lon': u'1 05 30.6S', + 'lat': u'035 51 20W', + '#fid': 17L, + '#geometry': 'POINT(-1.09183333 -35.85555556)', + }, + 18L: { + 'id': u'14', + 'description': u'Postfix sign 5', + 'lon': u'1 05 30.6+', + 'lat': u'035 51 20+', + '#fid': 18L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 19L: { + 'id': u'15', + 'description': u'Postfix sign 6', + 'lon': u'1 05 30.6-', + 'lat': u'035 51 20-', + '#fid': 19L, + '#geometry': 'POINT(-1.09183333 -35.85555556)', + }, + 21L: { + 'id': u'16', + 'description': u'Leading and trailing blanks 1', + 'lon': u' 1 05 30.6', + 'lat': u'035 51 20 ', + '#fid': 21L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 22L: { + 'id': u'17', + 'description': u'Leading and trailing blanks 2', + 'lon': u' N 1 05 30.6', + 'lat': u'035 51 20 E ', + '#fid': 22L, + '#geometry': 'POINT(1.09183333 35.85555556)', + }, + 24L: { + 'id': u'18', + 'description': u'Alternative characters for D,M,S', + 'lon': u'1d05m30.6s S', + 'lat': u"35d51'20", + '#fid': 24L, + '#geometry': 'POINT(-1.09183333 35.85555556)', + }, + 25L: { + 'id': u'19', + 'description': u'Degrees/minutes format', + 'lon': u'1 05.23', + 'lat': u'4 55.03', + '#fid': 25L, + '#geometry': 'POINT(1.08716667 4.91716667)', + }, + } + wanted['log']=[ + u'Errors in file file', + u'5 records discarded due to invalid geometry definitions', + u'The following lines were not loaded into QGIS due to errors:', + u'Invalid X or Y fields at line 27', + u'Invalid X or Y fields at line 28', + u'Invalid X or Y fields at line 29', + u'Invalid X or Y fields at line 30', + u'Invalid X or Y fields at line 31', + ] + return wanted + + +def test_016_decimal_point(): + wanted={} + wanted['uri']=u'file://testdp.csv?yField=geom_y&xField=geom_x&type=csv&delimiter=;&decimalPoint=,' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Comma as decimal point 1', + 'geom_x': u'10', + 'geom_y': u'20', + 'other': u'30', + 'text field': u'Field with , in it', + '#fid': 2L, + '#geometry': 'POINT(10.0 20.0)', + }, + 3L: { + 'id': u'2', + 'description': u'Comma as decimal point 2', + 'geom_x': u'12', + 'geom_y': u'25.003', + 'other': u'-38.55', + 'text field': u'Plain text field', + '#fid': 3L, + '#geometry': 'POINT(12.0 25.003)', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_017_regular_expression_1(): + wanted={} + wanted['uri']=u'file://testre.txt?geomType=none&trimFields=Y&delimiter=RE(?:GEXP)?&type=regexp' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Basic regular expression test', + 'data': u'data1', + 'info': u'info', + '#fid': 2L, + '#geometry': 'None', + }, + 3L: { + 'id': u'2', + 'description': u'Basic regular expression test 2', + 'data': u'data2', + 'info': u'info2', + '#fid': 3L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_018_regular_expression_2(): + wanted={} + wanted['uri']=u'file://testre.txt?geomType=none&trimFields=Y&delimiter=(RE)(GEXP)?&type=regexp' + wanted['data']={ + 2L: { + 'id': u'1', + 'RE': u'RE', + 'GEXP': u'GEXP', + 'description': u'RE', + 'RE_1': u'RE', + 'GEXP_1': u'GEXP', + 'data': u'data1', + 'RE_2': u'RE', + 'GEXP_2': u'GEXP', + 'info': u'info', + '#fid': 2L, + '#geometry': 'None', + }, + 3L: { + 'id': u'2', + 'RE': u'RE', + 'GEXP': u'GEXP', + 'description': u'RE', + 'RE_1': u'RE', + 'GEXP_1': u'', + 'data': u'data2', + 'RE_2': u'RE', + 'GEXP_2': u'', + 'info': u'info2', + '#fid': 3L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_019_regular_expression_3(): + wanted={} + wanted['uri']=u'file://testre2.txt?geomType=none&trimFields=Y&delimiter=^(.{5})(.{30})(.{5,})&type=regexp' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Anchored regexp', + 'information': u'Some data', + '#fid': 2L, + '#geometry': 'None', + }, + 4L: { + 'id': u'3', + 'description': u'Anchored regexp recovered', + 'information': u'Some data', + '#fid': 4L, + '#geometry': 'None', + }, + } + wanted['log']=[ + u'Errors in file file', + u'1 records discarded due to invalid format', + u'The following lines were not loaded into QGIS due to errors:', + u'Invalid record format at line 3', + ] + return wanted + + +def test_020_regular_expression_4(): + wanted={} + wanted['uri']=u'file://testre3.txt?geomType=none&delimiter=x?&type=regexp' + wanted['data']={ + 2L: { + 'id': u'f', + 'description': u'i', + 's': u'f', + 'm': u'i', + 'a': u'.', + 'l': u'.', + 'l_1': u'i', + 'field_6': u'l', + 'field_7': u'e', + '#fid': 2L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_021_regular_expression_5(): + wanted={} + wanted['uri']=u'file://testre3.txt?geomType=none&delimiter=\\b&type=regexp' + wanted['data']={ + 2L: { + 'id': u'fi', + 'description': u'..', + 'small': u'fi', + 'field_2': u'..', + 'field_3': u'ile', + '#fid': 2L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_022_utf8_encoded_file(): + wanted={} + wanted['uri']=u'file://testutf8.csv?geomType=none&delimiter=|&type=csv&encoding=utf-8' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Correctly read UTF8 encoding', + 'name': u'Field has \u0101cc\xe8nt\xe9d text', + '#fid': 2L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_023_latin1_encoded_file(): + wanted={} + wanted['uri']=u'file://testlatin1.csv?geomType=none&delimiter=|&type=csv&encoding=latin1' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Correctly read latin1 encoding', + 'name': u'This test is \xa9', + '#fid': 2L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_024_filter_rect_xy(): + wanted={} + wanted['uri']=u'file://testextpt.txt?yField=y&delimiter=|&type=csv&xField=x' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Inside', + 'x': u'15', + 'y': u'35', + '#fid': 2L, + '#geometry': 'POINT(15.0 35.0)', + }, + 10L: { + 'id': u'9', + 'description': u'Inside 2', + 'x': u'25', + 'y': u'45', + '#fid': 10L, + '#geometry': 'POINT(25.0 45.0)', + }, + 1002L: { + 'id': u'1', + 'description': u'Inside', + 'x': u'15', + 'y': u'35', + '#fid': 2L, + '#geometry': 'POINT(15.0 35.0)', + }, + 1010L: { + 'id': u'9', + 'description': u'Inside 2', + 'x': u'25', + 'y': u'45', + '#fid': 10L, + '#geometry': 'POINT(25.0 45.0)', + }, + } + wanted['log']=[ + 'Request 2 did not return any data', + ] + return wanted + + +def test_025_filter_rect_wkt(): + wanted={} + wanted['uri']=u'file://testextw.txt?delimiter=|&type=csv&wktField=wkt' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 4L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 5L: { + 'id': u'4', + 'description': u'Bounding box overlap', + '#fid': 5L, + '#geometry': 'LINESTRING(5.0 30.0, 5.0 55.0, 30.0 55.0)', + }, + 6L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 7L: { + 'id': u'6', + 'description': u'Bounding box overlap 2', + '#fid': 7L, + '#geometry': 'LINESTRING(28.0 29.0, 31.0 29.0, 31.0 33.0)', + }, + 1002L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 1004L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 1006L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + } + wanted['log']=[ + 'Request 2 did not return any data', + ] + return wanted + + +def test_026_filter_fid(): + wanted={} + wanted['uri']=u'file://test.csv?geomType=none&type=csv' + wanted['data']={ + 3L: { + 'id': u'2', + 'description': u'Quoted field', + 'data': u'Quoted data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 3L, + '#geometry': 'None', + }, + 1009L: { + 'id': u'5', + 'description': u'Extra fields', + 'data': u'data', + 'info': u'info', + 'field_5': u'message', + '#fid': 9L, + '#geometry': 'None', + }, + 3003L: { + 'id': u'2', + 'description': u'Quoted field', + 'data': u'Quoted data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 3L, + '#geometry': 'None', + }, + } + wanted['log']=[ + 'Request 2 did not return any data', + ] + return wanted + + +def test_027_filter_attributes(): + wanted={} + wanted['uri']=u'file://test.csv?geomType=none&type=csv' + wanted['data']={ + 2L: { + 'id': u'', + 'description': u'Basic unquoted record', + 'data': u'', + 'info': u'Some info', + 'field_5': u'', + '#fid': 2L, + '#geometry': 'None', + }, + 3L: { + 'id': u'', + 'description': u'Quoted field', + 'data': u'', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 3L, + '#geometry': 'None', + }, + 4L: { + 'id': u'', + 'description': u'Escaped quotes', + 'data': u'', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 4L, + '#geometry': 'None', + }, + 5L: { + 'id': u'', + 'description': u'Quoted newlines', + 'data': u'', + 'info': u'No data', + 'field_5': u'', + '#fid': 5L, + '#geometry': 'None', + }, + 9L: { + 'id': u'', + 'description': u'Extra fields', + 'data': u'', + 'info': u'info', + 'field_5': u'', + '#fid': 9L, + '#geometry': 'None', + }, + 10L: { + 'id': u'', + 'description': u'Missing fields', + 'data': u'', + 'info': u'', + 'field_5': u'', + '#fid': 10L, + '#geometry': 'None', + }, + 1009L: { + 'id': u'5', + 'description': u'Extra fields', + 'data': u'data', + 'info': u'info', + 'field_5': u'message', + '#fid': 9L, + '#geometry': 'None', + }, + 2009L: { + 'id': u'', + 'description': u'Extra fields', + 'data': u'', + 'info': u'info', + 'field_5': u'', + '#fid': 9L, + '#geometry': 'None', + }, + 3009L: { + 'id': u'', + 'description': u'Extra fields', + 'data': u'', + 'info': u'info', + 'field_5': u'', + '#fid': 9L, + '#geometry': 'None', + }, + 4009L: { + 'id': u'', + 'description': u'Extra fields', + 'data': u'', + 'info': u'info', + 'field_5': u'', + '#fid': 9L, + '#geometry': 'None', + }, + 5009L: { + 'id': u'', + 'description': u'', + 'data': u'', + 'info': u'', + 'field_5': u'', + '#fid': 9L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_028_substring_test(): + wanted={} + wanted['uri']=u'file://test.csv?geomType=none&type=csv&subset=id%20%25%202%20%3D%201' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Basic unquoted record', + 'data': u'Some data', + 'info': u'Some info', + 'field_5': u'', + '#fid': 2L, + '#geometry': 'None', + }, + 4L: { + 'id': u'3', + 'description': u'Escaped quotes', + 'data': u'Quoted "citation" data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 4L, + '#geometry': 'None', + }, + 9L: { + 'id': u'5', + 'description': u'Extra fields', + 'data': u'data', + 'info': u'info', + 'field_5': u'message', + '#fid': 9L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted + + +def test_029_file_watcher(): + wanted={} + wanted['uri']=u'file://file?geomType=none&type=csv' + wanted['data']={ + 3L: { + 'id': u'2', + 'description': u'pooh', + 'name': u'pooh', + '#fid': 3L, + '#geometry': 'None', + }, + 1002L: { + 'id': u'1', + 'description': u'rabbit', + 'name': u'rabbit', + '#fid': 2L, + '#geometry': 'None', + }, + 1003L: { + 'id': u'2', + 'description': u'pooh', + 'name': u'pooh', + '#fid': 3L, + '#geometry': 'None', + }, + 4003L: { + 'id': u'2', + 'description': u'pooh', + 'name': u'pooh', + '#fid': 3L, + '#geometry': 'None', + }, + 5004L: { + 'id': u'3', + 'description': u'tigger', + 'name': u'tigger', + '#fid': 4L, + '#geometry': 'None', + }, + 6002L: { + 'id': u'1', + 'description': u'rabbit', + 'name': u'rabbit', + '#fid': 2L, + '#geometry': 'None', + }, + 6003L: { + 'id': u'2', + 'description': u'pooh', + 'name': u'pooh', + '#fid': 3L, + '#geometry': 'None', + }, + 6004L: { + 'id': u'3', + 'description': u'tigger', + 'name': u'tigger', + '#fid': 4L, + '#geometry': 'None', + }, + 9002L: { + 'id': u'5', + 'description': u'toad', + 'name': u'toad', + '#fid': 2L, + '#geometry': 'None', + }, + 10002L: { + 'id': u'5', + 'description': u'toad', + 'name': u'toad', + '#fid': 2L, + '#geometry': 'None', + }, + 10003L: { + 'id': u'6', + 'description': u'mole', + 'name': u'mole', + '#fid': 3L, + '#geometry': 'None', + }, + 10004L: { + 'id': u'7', + 'description': u'badger', + 'name': u'badger', + '#fid': 4L, + '#geometry': 'None', + }, + 13002L: { + 'id': u'5', + 'description': u'toad', + 'name': u'toad', + '#fid': 2L, + '#geometry': 'None', + }, + 14003L: { + 'id': u'6', + 'description': u'mole', + 'name': u'mole', + '#fid': 3L, + '#geometry': 'None', + }, + 14004L: { + 'id': u'7', + 'description': u'badger', + 'name': u'badger', + '#fid': 4L, + '#geometry': 'None', + }, + } + wanted['log']=[ + 'Request 2 did not return any data', + 'Request 7 did not return any data', + 'Request 11 did not return any data', + u'Errors in file file', + u'The file has been updated by another application - reloading', + u'Errors in file file', + u'The file has been updated by another application - reloading', + ] + return wanted + + +def test_030_filter_rect_xy_spatial_index(): + wanted={} + wanted['uri']=u'file://testextpt.txt?spatialIndex=Y&yField=y&delimiter=|&type=csv&xField=x' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Inside', + 'x': u'15', + 'y': u'35', + '#fid': 2L, + '#geometry': 'POINT(15.0 35.0)', + }, + 10L: { + 'id': u'9', + 'description': u'Inside 2', + 'x': u'25', + 'y': u'45', + '#fid': 10L, + '#geometry': 'POINT(25.0 45.0)', + }, + 1002L: { + 'id': u'1', + 'description': u'Inside', + 'x': u'15', + 'y': u'35', + '#fid': 2L, + '#geometry': 'POINT(15.0 35.0)', + }, + 1010L: { + 'id': u'9', + 'description': u'Inside 2', + 'x': u'25', + 'y': u'45', + '#fid': 10L, + '#geometry': 'POINT(25.0 45.0)', + }, + 3002L: { + 'id': u'1', + 'description': u'Inside', + 'x': u'15', + 'y': u'35', + '#fid': 2L, + '#geometry': 'POINT(15.0 35.0)', + }, + 3003L: { + 'id': u'2', + 'description': u'Outside 1', + 'x': u'5', + 'y': u'35', + '#fid': 3L, + '#geometry': 'POINT(5.0 35.0)', + }, + 3004L: { + 'id': u'3', + 'description': u'Outside 2', + 'x': u'5', + 'y': u'55', + '#fid': 4L, + '#geometry': 'POINT(5.0 55.0)', + }, + 3005L: { + 'id': u'4', + 'description': u'Outside 3', + 'x': u'15', + 'y': u'55', + '#fid': 5L, + '#geometry': 'POINT(15.0 55.0)', + }, + 3006L: { + 'id': u'5', + 'description': u'Outside 4', + 'x': u'35', + 'y': u'55', + '#fid': 6L, + '#geometry': 'POINT(35.0 55.0)', + }, + 3007L: { + 'id': u'6', + 'description': u'Outside 5', + 'x': u'35', + 'y': u'45', + '#fid': 7L, + '#geometry': 'POINT(35.0 45.0)', + }, + 3008L: { + 'id': u'7', + 'description': u'Outside 7', + 'x': u'35', + 'y': u'25', + '#fid': 8L, + '#geometry': 'POINT(35.0 25.0)', + }, + 3009L: { + 'id': u'8', + 'description': u'Outside 8', + 'x': u'15', + 'y': u'25', + '#fid': 9L, + '#geometry': 'POINT(15.0 25.0)', + }, + 3010L: { + 'id': u'9', + 'description': u'Inside 2', + 'x': u'25', + 'y': u'45', + '#fid': 10L, + '#geometry': 'POINT(25.0 45.0)', + }, + 4002L: { + 'id': u'1', + 'description': u'Inside', + 'x': u'15', + 'y': u'35', + '#fid': 2L, + '#geometry': 'POINT(15.0 35.0)', + }, + 4003L: { + 'id': u'2', + 'description': u'Outside 1', + 'x': u'5', + 'y': u'35', + '#fid': 3L, + '#geometry': 'POINT(5.0 35.0)', + }, + 4004L: { + 'id': u'3', + 'description': u'Outside 2', + 'x': u'5', + 'y': u'55', + '#fid': 4L, + '#geometry': 'POINT(5.0 55.0)', + }, + 4005L: { + 'id': u'4', + 'description': u'Outside 3', + 'x': u'15', + 'y': u'55', + '#fid': 5L, + '#geometry': 'POINT(15.0 55.0)', + }, + 4006L: { + 'id': u'5', + 'description': u'Outside 4', + 'x': u'35', + 'y': u'55', + '#fid': 6L, + '#geometry': 'POINT(35.0 55.0)', + }, + 4007L: { + 'id': u'6', + 'description': u'Outside 5', + 'x': u'35', + 'y': u'45', + '#fid': 7L, + '#geometry': 'POINT(35.0 45.0)', + }, + 4008L: { + 'id': u'7', + 'description': u'Outside 7', + 'x': u'35', + 'y': u'25', + '#fid': 8L, + '#geometry': 'POINT(35.0 25.0)', + }, + 4009L: { + 'id': u'8', + 'description': u'Outside 8', + 'x': u'15', + 'y': u'25', + '#fid': 9L, + '#geometry': 'POINT(15.0 25.0)', + }, + 4010L: { + 'id': u'9', + 'description': u'Inside 2', + 'x': u'25', + 'y': u'45', + '#fid': 10L, + '#geometry': 'POINT(25.0 45.0)', + }, + } + wanted['log']=[ + 'Request 2 did not return any data', + ] + return wanted + + +def test_031_filter_rect_wkt_spatial_index(): + wanted={} + wanted['uri']=u'file://testextw.txt?spatialIndex=Y&delimiter=|&type=csv&wktField=wkt' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 4L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 5L: { + 'id': u'4', + 'description': u'Bounding box overlap', + '#fid': 5L, + '#geometry': 'LINESTRING(5.0 30.0, 5.0 55.0, 30.0 55.0)', + }, + 6L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 7L: { + 'id': u'6', + 'description': u'Bounding box overlap 2', + '#fid': 7L, + '#geometry': 'LINESTRING(28.0 29.0, 31.0 29.0, 31.0 33.0)', + }, + 1002L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 1004L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 1006L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 3002L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 3003L: { + 'id': u'2', + 'description': u'Outside', + '#fid': 3L, + '#geometry': 'LINESTRING(0.0 0.0, 0.0 10.0)', + }, + 3004L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 3005L: { + 'id': u'4', + 'description': u'Bounding box overlap', + '#fid': 5L, + '#geometry': 'LINESTRING(5.0 30.0, 5.0 55.0, 30.0 55.0)', + }, + 3006L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 3007L: { + 'id': u'6', + 'description': u'Bounding box overlap 2', + '#fid': 7L, + '#geometry': 'LINESTRING(28.0 29.0, 31.0 29.0, 31.0 33.0)', + }, + 4002L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 4003L: { + 'id': u'2', + 'description': u'Outside', + '#fid': 3L, + '#geometry': 'LINESTRING(0.0 0.0, 0.0 10.0)', + }, + 4004L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 4005L: { + 'id': u'4', + 'description': u'Bounding box overlap', + '#fid': 5L, + '#geometry': 'LINESTRING(5.0 30.0, 5.0 55.0, 30.0 55.0)', + }, + 4006L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 4007L: { + 'id': u'6', + 'description': u'Bounding box overlap 2', + '#fid': 7L, + '#geometry': 'LINESTRING(28.0 29.0, 31.0 29.0, 31.0 33.0)', + }, + } + wanted['log']=[ + 'Request 2 did not return any data', + ] + return wanted + + +def test_032_filter_rect_wkt_create_spatial_index(): + wanted={} + wanted['uri']=u'file://testextw.txt?delimiter=|&type=csv&wktField=wkt' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 4L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 5L: { + 'id': u'4', + 'description': u'Bounding box overlap', + '#fid': 5L, + '#geometry': 'LINESTRING(5.0 30.0, 5.0 55.0, 30.0 55.0)', + }, + 6L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 7L: { + 'id': u'6', + 'description': u'Bounding box overlap 2', + '#fid': 7L, + '#geometry': 'LINESTRING(28.0 29.0, 31.0 29.0, 31.0 33.0)', + }, + 1002L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 1003L: { + 'id': u'2', + 'description': u'Outside', + '#fid': 3L, + '#geometry': 'LINESTRING(0.0 0.0, 0.0 10.0)', + }, + 1004L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 1005L: { + 'id': u'4', + 'description': u'Bounding box overlap', + '#fid': 5L, + '#geometry': 'LINESTRING(5.0 30.0, 5.0 55.0, 30.0 55.0)', + }, + 1006L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 1007L: { + 'id': u'6', + 'description': u'Bounding box overlap 2', + '#fid': 7L, + '#geometry': 'LINESTRING(28.0 29.0, 31.0 29.0, 31.0 33.0)', + }, + 3002L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 3004L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 3005L: { + 'id': u'4', + 'description': u'Bounding box overlap', + '#fid': 5L, + '#geometry': 'LINESTRING(5.0 30.0, 5.0 55.0, 30.0 55.0)', + }, + 3006L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 3007L: { + 'id': u'6', + 'description': u'Bounding box overlap 2', + '#fid': 7L, + '#geometry': 'LINESTRING(28.0 29.0, 31.0 29.0, 31.0 33.0)', + }, + 4002L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 4004L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 4006L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 6002L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 6003L: { + 'id': u'2', + 'description': u'Outside', + '#fid': 3L, + '#geometry': 'LINESTRING(0.0 0.0, 0.0 10.0)', + }, + 6004L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 6005L: { + 'id': u'4', + 'description': u'Bounding box overlap', + '#fid': 5L, + '#geometry': 'LINESTRING(5.0 30.0, 5.0 55.0, 30.0 55.0)', + }, + 6006L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 6007L: { + 'id': u'6', + 'description': u'Bounding box overlap 2', + '#fid': 7L, + '#geometry': 'LINESTRING(28.0 29.0, 31.0 29.0, 31.0 33.0)', + }, + 7002L: { + 'id': u'1', + 'description': u'Inside', + '#fid': 2L, + '#geometry': 'LINESTRING(12.0 32.0, 28.0 48.0)', + }, + 7003L: { + 'id': u'2', + 'description': u'Outside', + '#fid': 3L, + '#geometry': 'LINESTRING(0.0 0.0, 0.0 10.0)', + }, + 7004L: { + 'id': u'3', + 'description': u'Crossing', + '#fid': 4L, + '#geometry': 'LINESTRING(5.0 30.0, 30.0 55.0)', + }, + 7005L: { + 'id': u'4', + 'description': u'Bounding box overlap', + '#fid': 5L, + '#geometry': 'LINESTRING(5.0 30.0, 5.0 55.0, 30.0 55.0)', + }, + 7006L: { + 'id': u'5', + 'description': u'Crossing 2', + '#fid': 6L, + '#geometry': 'LINESTRING(25.0 35.0, 35.0 35.0)', + }, + 7007L: { + 'id': u'6', + 'description': u'Bounding box overlap 2', + '#fid': 7L, + '#geometry': 'LINESTRING(28.0 29.0, 31.0 29.0, 31.0 33.0)', + }, + } + wanted['log']=[ + 'Request 5 did not return any data', + ] + return wanted + +def test_033_reset_subset_string(): + wanted={} + wanted['uri']=u'file://test.csv?geomType=none&type=csv' + wanted['data']={ + 2L: { + 'id': u'1', + 'description': u'Basic unquoted record', + 'data': u'Some data', + 'info': u'Some info', + 'field_5': u'', + '#fid': 2L, + '#geometry': 'None', + }, + 3L: { + 'id': u'2', + 'description': u'Quoted field', + 'data': u'Quoted data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 3L, + '#geometry': 'None', + }, + 4L: { + 'id': u'3', + 'description': u'Escaped quotes', + 'data': u'Quoted "citation" data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 4L, + '#geometry': 'None', + }, + 5L: { + 'id': u'4', + 'description': u'Quoted newlines', + 'data': u'Line 1\nLine 2\n\nLine 4', + 'info': u'No data', + 'field_5': u'', + '#fid': 5L, + '#geometry': 'None', + }, + 9L: { + 'id': u'5', + 'description': u'Extra fields', + 'data': u'data', + 'info': u'info', + 'field_5': u'message', + '#fid': 9L, + '#geometry': 'None', + }, + 10L: { + 'id': u'6', + 'description': u'Missing fields', + 'data': u'', + 'info': u'', + 'field_5': u'', + '#fid': 10L, + '#geometry': 'None', + }, + 2002L: { + 'id': u'1', + 'description': u'Basic unquoted record', + 'data': u'Some data', + 'info': u'Some info', + 'field_5': u'', + '#fid': 2L, + '#geometry': 'None', + }, + 2004L: { + 'id': u'3', + 'description': u'Escaped quotes', + 'data': u'Quoted "citation" data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 4L, + '#geometry': 'None', + }, + 2009L: { + 'id': u'5', + 'description': u'Extra fields', + 'data': u'data', + 'info': u'info', + 'field_5': u'message', + '#fid': 9L, + '#geometry': 'None', + }, + 4010L: { + 'id': u'6', + 'description': u'Missing fields', + 'data': u'', + 'info': u'', + 'field_5': u'', + '#fid': 10L, + '#geometry': 'None', + }, + 6004L: { + 'id': u'3', + 'description': u'Escaped quotes', + 'data': u'Quoted "citation" data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 4L, + '#geometry': 'None', + }, + 8002L: { + 'id': u'1', + 'description': u'Basic unquoted record', + 'data': u'Some data', + 'info': u'Some info', + 'field_5': u'', + '#fid': 2L, + '#geometry': 'None', + }, + 8004L: { + 'id': u'3', + 'description': u'Escaped quotes', + 'data': u'Quoted "citation" data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 4L, + '#geometry': 'None', + }, + 8009L: { + 'id': u'5', + 'description': u'Extra fields', + 'data': u'data', + 'info': u'info', + 'field_5': u'message', + '#fid': 9L, + '#geometry': 'None', + }, + 10003L: { + 'id': u'2', + 'description': u'Quoted field', + 'data': u'Quoted data', + 'info': u'Unquoted', + 'field_5': u'', + '#fid': 3L, + '#geometry': 'None', + }, + 10005L: { + 'id': u'4', + 'description': u'Quoted newlines', + 'data': u'Line 1\nLine 2\n\nLine 4', + 'info': u'No data', + 'field_5': u'', + '#fid': 5L, + '#geometry': 'None', + }, + 10010L: { + 'id': u'6', + 'description': u'Missing fields', + 'data': u'', + 'info': u'', + 'field_5': u'', + '#fid': 10L, + '#geometry': 'None', + }, + } + wanted['log']=[ + ] + return wanted +