Skip to content

Commit

Permalink
Merge pull request qgis#582 from ccrook/delimited_text_indexing_imple…
Browse files Browse the repository at this point in the history
…mented

Delimited text provider indexing efficiency improvements.
  • Loading branch information
timlinux committed May 12, 2013
2 parents 724ed72 + 2c37896 commit d8dac0c
Show file tree
Hide file tree
Showing 11 changed files with 3,092 additions and 1,548 deletions.
7 changes: 5 additions & 2 deletions resources/context_help/QgsDelimitedTextSourceSelect-en_US
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ or are duplicated.
</p>
<p>
In addition to the attributes explicitly in the data file QGIS assigns a unique
feature id to each record. This is the line number in the source file on which
feature id to each record which is the line number in the source file on which
the record starts.
</p>
<p>
Expand Down Expand Up @@ -275,7 +275,10 @@ The following options can be added
<li><tt>crs=...</tt> specifies the coordinate system to use for the vector layer, in a format accepted by QgsCoordinateReferenceSystem.createFromString (for example &quot;EPSG:4167&quot;). If this is not
specified then a dialog box may request this information from the user
when the layer is loaded (depending on QGIS CRS settings).</li>
<li><tt>quiet=(yes|no)</tt> specifies whether errors encountered loading the layer are presented in a dialog box (they will be written to the QGIS log in any case). The default is no.</li>
<li><tt>subsetIndex=(yes|no)</tt> specifies whether the provider should build an index to define subset during the initial file scan. The index will apply both for explicitly defined subsets, and for the implicit subset of features for which the geometry definition is valid. By default the subset index is built if it is applicable. This option is not available from the GUI.</li>
<li><tt>spatialIndex=(yes|no)</tt> specifies whether the provider should build a spatial index during the initial file scan. By default the spatial index is not built. </li>
<li><tt>useWatcher=(yes|no)</tt> specifies whether the provider should use a file system watcher to monitor for changes to the file. This option is not available from the GUI</li>
<li><tt>quiet=(yes|no)</tt> specifies whether errors encountered loading the layer are presented in a dialog box (they will be written to the QGIS log in any case). The default is no. This option is not available from the GUI</li>
</ul>


14 changes: 14 additions & 0 deletions src/core/qgsvectorlayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,20 @@ struct CORE_EXPORT QgsVectorJoinInfo
* Defines the coordinate reference system used for the layer. This can be
* any string accepted by QgsCoordinateReferenceSystem::createFromString()
*
* -subsetIndex=(yes|no)
*
* Determines whether the provider generates an index to improve the efficiency
* of subsets. The default is yes
*
* -spatialIndex=(yes|no)
*
* Determines whether the provider generates a spatial index. The default is no.
*
* -useWatcher=(yes|no)
*
* Defines whether the file will be monitored for changes. The default is
* to monitor for changes.
*
* - quiet
*
* Errors encountered loading the file will not be reported in a user dialog if
Expand Down
1 change: 1 addition & 0 deletions src/providers/delimitedtext/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ SET (DTEXT_SRCS
)

SET (DTEXT_MOC_HDRS
qgsdelimitedtextfile.h
qgsdelimitedtextprovider.h
qgsdelimitedtextsourceselect.h
)
Expand Down
170 changes: 168 additions & 2 deletions src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@
#include "qgsdelimitedtextprovider.h"
#include "qgsdelimitedtextfile.h"

#include "qgsexpression.h"
#include "qgsgeometry.h"
#include "qgslogger.h"
#include "qgsmessagelog.h"
#include "qgsspatialindex.h"

#include <QtAlgorithms>
#include <QTextStream>

QgsDelimitedTextFeatureIterator::QgsDelimitedTextFeatureIterator( QgsDelimitedTextProvider* p, const QgsFeatureRequest& request )
Expand All @@ -32,6 +36,106 @@ QgsDelimitedTextFeatureIterator::QgsDelimitedTextFeatureIterator( QgsDelimitedTe
}
P->mActiveIterator = this;

// Determine mode to use based on request...

QgsDebugMsg( "Setting up QgsDelimitedTextIterator" );

// Does the layer have geometry - will revise later to determine if we actually need to
// load it.
mLoadGeometry = P->mGeomRep != QgsDelimitedTextProvider::GeomNone;

// Does the layer have an explicit or implicit subset (implicit subset is if we have geometry which can
// be invalid)

mTestSubset = P->mSubsetExpression;
mTestGeometry = false;

mMode = FileScan;
if ( request.filterType() == QgsFeatureRequest::FilterFid )
{
QgsDebugMsg( "Configuring for returning single id" );
mFeatureIds.append( request.filterFid() );
mMode = FeatureIds;
mTestSubset = false;
}
// If have geometry and testing geometry then evaluate options...
// If we don't have geometry then all records pass geometry filter.
// CC: 2013-05-09
// Not sure about intended relationship between filtering on geometry and
// requesting no geometry? Have preserved current logic of ignoring spatial filter
// if not requesting geometry.

else if ( request.filterType() == QgsFeatureRequest::FilterRect && mLoadGeometry
&& !( mRequest.flags() & QgsFeatureRequest::NoGeometry ) )
{
QgsDebugMsg( "Configuring for rectangle select" );
mTestGeometry = true;
// Exact intersection test only applies for WKT geometries
mTestGeometryExact = mRequest.flags() & QgsFeatureRequest::ExactIntersect
&& P->mGeomRep == QgsDelimitedTextProvider::GeomAsWkt;

QgsRectangle rect = request.filterRect();

// If request doesn't overlap extents, then nothing to return
if ( ! rect.intersects( P->extent() ) )
{
QgsDebugMsg( "Rectangle outside layer extents - no features to return" );
mMode = FeatureIds;
}
// If the request extents include the entire layer, then revert to
// a file scan

else if ( rect.contains( P->extent() ) )
{
QgsDebugMsg( "Rectangle contains layer extents - bypass spatial filter" );
mTestGeometry = false;
}
// If we have a spatial index then use it. The spatial index already accounts
// for the subset. Also means we don't have to test geometries unless doing exact
// intersection

else if ( P->mUseSpatialIndex )
{
mFeatureIds = P->mSpatialIndex->intersects( rect );
// Sort for efficient sequential retrieval
qSort(mFeatureIds.begin(), mFeatureIds.end());
QgsDebugMsg( QString("Layer has spatial index - selected %1 features from index").arg(mFeatureIds.size()) );
mMode = FeatureIds;
mTestSubset = false;
mTestGeometry = mTestGeometryExact;
}
}

// If we have a subset index then use it..
if ( mMode == FileScan && P->mUseSubsetIndex )
{
QgsDebugMsg( QString("Layer has subset index - use %1 items from subset index").arg(P->mSubsetIndex.size()) );
mTestSubset = false;
mMode = SubsetIndex;
}

// Otherwise just have to scan the file
if( mMode == FileScan )
{
QgsDebugMsg( "File will be scanned for desired features" );
}

// If the request does not require geometry, can we avoid loading it?
// We need it if we are testing geometry (ie spatial filter), or
// if testing the subset expression, and it uses geometry.
if ( mRequest.flags() & QgsFeatureRequest::NoGeometry &&
! mTestGeometry &&
! ( mTestSubset && P->mSubsetExpression->needsGeometry() ) )
{
QgsDebugMsg( "Feature geometries not required" );
mLoadGeometry = false;
}

QgsDebugMsg( QString("Iterator is scanning file: ") + (scanningFile() ? "Yes" : "No"));
QgsDebugMsg( QString("Iterator is loading geometries: ") + (loadGeometry() ? "Yes" : "No"));
QgsDebugMsg( QString("Iterator is testing geometries: ") + (testGeometry() ? "Yes" : "No"));
QgsDebugMsg( QString("Iterator is testing subset: ") + (testSubset() ? "Yes" : "No"));

rewind();
}

Expand All @@ -49,8 +153,40 @@ bool QgsDelimitedTextFeatureIterator::nextFeature( QgsFeature& feature )
if ( mClosed )
return false;

bool gotFeature = P->nextFeature( feature, P->mFile, mRequest );
bool gotFeature = false;
if ( mMode == FileScan )
{
gotFeature = P->nextFeature( feature, P->mFile, this );
}
else
{
while( ! gotFeature )
{
qint64 fid = -1;
if ( mMode == FeatureIds )
{
if( mNextId < mFeatureIds.size() )
{
fid = mFeatureIds[mNextId];
}
}
else if( mNextId < P->mSubsetIndex.size() )
{
fid = P->mSubsetIndex[mNextId];
}
if( fid < 0 ) break;
mNextId++;
gotFeature = (P->setNextFeatureId( fid ) && P->nextFeature( feature, P->mFile, this ));
}
}

// CC: 2013-05-08: What is the intent of rewind/close. The following
// line from previous implementation means that we cannot rewind the iterator
// after reading last record? Is this correct? This line can be removed if
// not.

if ( ! gotFeature ) close();

return gotFeature;
}

Expand All @@ -60,7 +196,14 @@ bool QgsDelimitedTextFeatureIterator::rewind()
return false;

// Skip to first data record
P->resetStream();
if ( mMode == FileScan )
{
P->resetStream();
}
else
{
mNextId = 0;
}
return true;
}

Expand All @@ -71,6 +214,29 @@ bool QgsDelimitedTextFeatureIterator::close()

// tell provider that this iterator is not active anymore
P->mActiveIterator = 0;
mFeatureIds = QList<QgsFeatureId>();
mClosed = true;
return true;
}

/**
* Check to see if the point is within the selection rectangle
*/
bool QgsDelimitedTextFeatureIterator::wantGeometry( const QgsPoint &pt ) const
{
if ( ! mTestGeometry ) return true;
return mRequest.filterRect().contains( pt );
}

/**
* Check to see if the geometry is within the selection rectangle
*/
bool QgsDelimitedTextFeatureIterator::wantGeometry( QgsGeometry *geom ) const
{
if ( ! mTestGeometry ) return true;

if ( mTestGeometryExact )
return geom->intersects( mRequest.filterRect() );
else
return geom->boundingBox().intersects( mRequest.filterRect() );
}
29 changes: 29 additions & 0 deletions src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,20 @@
#ifndef QGSDELIMITEDTEXTFEATUREITERATOR_H
#define QGSDELIMITEDTEXTFEATUREITERATOR_H

#include <QList>
#include "qgsfeatureiterator.h"
#include "qgsfeature.h"

class QgsDelimitedTextProvider;

class QgsDelimitedTextFeatureIterator : public QgsAbstractFeatureIterator
{
enum IteratorMode
{
FileScan,
SubsetIndex,
FeatureIds
};
public:
QgsDelimitedTextFeatureIterator( QgsDelimitedTextProvider* p, const QgsFeatureRequest& request );

Expand All @@ -35,8 +43,29 @@ class QgsDelimitedTextFeatureIterator : public QgsAbstractFeatureIterator
//! end of iterating: free the resources / lock
virtual bool close();

// Flags used by nextFeature function of QgsDelimitedTextProvider
bool testSubset() const { return mTestSubset; }
bool testGeometry() const { return mTestGeometry; }
bool loadGeometry() const { return mLoadGeometry; }
bool loadSubsetOfAttributes() const { return ! mTestSubset && mRequest.flags() & QgsFeatureRequest::SubsetOfAttributes;}
bool scanningFile() const { return mMode == FileScan; }

// Pass through attribute subset
const QgsAttributeList &subsetOfAttributes() const { return mRequest.subsetOfAttributes(); }

// Tests whether the geometry is required, given that testGeometry is true.
bool wantGeometry( const QgsPoint & point ) const;
bool wantGeometry( QgsGeometry *geom ) const;

protected:
QgsDelimitedTextProvider* P;
QList<QgsFeatureId> mFeatureIds;
IteratorMode mMode;
long mNextId;
bool mTestSubset;
bool mTestGeometry;
bool mTestGeometryExact;
bool mLoadGeometry;
};


Expand Down
Loading

0 comments on commit d8dac0c

Please sign in to comment.