Skip to content

Commit

Permalink
Add efficient VSIRmdirRecursive() implementation for /vsis3/
Browse files Browse the repository at this point in the history
  • Loading branch information
rouault committed Feb 25, 2020
1 parent b1ed169 commit 8a702a1
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 40 deletions.
90 changes: 90 additions & 0 deletions autotest/gcore/vsis3.py
Original file line number Diff line number Diff line change
Expand Up @@ -1427,6 +1427,96 @@ def method(request):
ret = gdal.UnlinkBatch(['/vsis3/unlink_batch/foo'])
assert not ret

###############################################################################
# Test RmdirRecursive() with a fake AWS server


def test_vsis3_rmdir_recursive():

if gdaltest.webserver_port == 0:
pytest.skip()

handler = webserver.SequentialHandler()
handler.add('GET', '/test_rmdir_recursive/?prefix=somedir%2F', 200, {'Content-type': 'application/xml'},
"""<?xml version="1.0" encoding="UTF-8"?>
<ListBucketResult>
<Prefix>somedir/</Prefix>
<Marker/>
<Contents>
<Key>somedir/test.txt</Key>
<LastModified>1970-01-01T00:00:01.000Z</LastModified>
<Size>40</Size>
</Contents>
<Contents>
<Key>somedir/subdir/</Key>
<LastModified>1970-01-01T00:00:01.000Z</LastModified>
<Size>0</Size>
</Contents>
<Contents>
<Key>somedir/subdir/test.txt</Key>
<LastModified>1970-01-01T00:00:01.000Z</LastModified>
<Size>5</Size>
</Contents>
</ListBucketResult>
""")

def method(request):
content = request.rfile.read(int(request.headers['Content-Length'])).decode('ascii')
if content != """<?xml version="1.0" encoding="UTF-8"?>
<Delete xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
<Object>
<Key>somedir/test.txt</Key>
</Object>
<Object>
<Key>somedir/subdir/</Key>
</Object>
</Delete>
""":
sys.stderr.write('Did not get expected content: %s\n' % content)
request.send_response(403)
return

request.protocol_version = 'HTTP/1.1'
request.send_response(200)
response = """<DeleteResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Deleted><Key>somedir/test.txt</Key></Deleted><Deleted><Key>somedir/subdir/</Key></Deleted></DeleteResult>"""
request.send_header('Content-Length', len(response))
request.send_header('Connection', 'close')
request.end_headers()
request.wfile.write(response.encode('ascii'))

handler.add('POST', '/test_rmdir_recursive/?delete', custom_method=method)

def method(request):
content = request.rfile.read(int(request.headers['Content-Length'])).decode('ascii')
if content != """<?xml version="1.0" encoding="UTF-8"?>
<Delete xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
<Object>
<Key>somedir/subdir/test.txt</Key>
</Object>
<Object>
<Key>somedir/</Key>
</Object>
</Delete>
""":
sys.stderr.write('Did not get expected content: %s\n' % content)
request.send_response(403)
return

request.protocol_version = 'HTTP/1.1'
request.send_response(200)
response = """<DeleteResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Deleted><Key>somedir/subdir/test.txt</Key></Deleted><Deleted><Key>somedir/</Key></Deleted></DeleteResult>"""
request.send_header('Content-Length', len(response))
request.send_header('Connection', 'close')
request.end_headers()
request.wfile.write(response.encode('ascii'))

handler.add('POST', '/test_rmdir_recursive/?delete', custom_method=method)

with gdaltest.config_option('CPL_VSIS3_UNLINK_BATCH_SIZE', '2'):
with webserver.install_http_handler(handler):
assert gdal.RmdirRecursive('/vsis3/test_rmdir_recursive/somedir') == 0


###############################################################################
# Test multipart upload with a fake AWS server

Expand Down
1 change: 1 addition & 0 deletions gdal/port/cpl_vsi_virtual.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class CPL_DLL VSIFilesystemHandler {
{(void)pszDirname; (void)nMode; errno=ENOENT; return -1;}
virtual int Rmdir( const char *pszDirname )
{ (void) pszDirname; errno=ENOENT; return -1; }
virtual int RmdirRecursive( const char *pszDirname );
virtual char **ReadDir( const char *pszDirname )
{ (void) pszDirname; return nullptr; }
virtual char **ReadDirEx( const char *pszDirname, int /* nMaxFiles */ )
Expand Down
84 changes: 49 additions & 35 deletions gdal/port/cpl_vsil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -702,14 +702,17 @@ int VSIRmdir( const char * pszDirname )
}

/************************************************************************/
/* VSIRmdir() */
/* VSIRmdirRecursive() */
/************************************************************************/

/**
* \brief Delete a directory recursively
*
* Deletes a directory object and its content from the file system.
*
* Starting with GDAL 3.1, /vsis3/ has an efficient implementation of this
* function.
*
* @return 0 on success or -1 on an error.
* @since GDAL 2.3
*/
Expand All @@ -721,40 +724,9 @@ int VSIRmdirRecursive( const char* pszDirname )
{
return -1;
}
char** papszFiles = VSIReadDir(pszDirname);
for( char** papszIter = papszFiles; papszIter && *papszIter; ++papszIter )
{
if( (*papszIter)[0] == '\0' ||
strcmp(*papszIter, ".") == 0 ||
strcmp(*papszIter, "..") == 0 )
{
continue;
}
VSIStatBufL sStat;
const CPLString osFilename(
CPLFormFilename(pszDirname, *papszIter, nullptr));
if( VSIStatL(osFilename, &sStat) == 0 )
{
if( VSI_ISDIR(sStat.st_mode) )
{
if( VSIRmdirRecursive(osFilename) != 0 )
{
CSLDestroy(papszFiles);
return -1;
}
}
else
{
if( VSIUnlink(osFilename) != 0 )
{
CSLDestroy(papszFiles);
return -1;
}
}
}
}
CSLDestroy(papszFiles);
return VSIRmdir(pszDirname);
VSIFilesystemHandler *poFSHandler =
VSIFileManager::GetHandler( pszDirname );
return poFSHandler->RmdirRecursive( pszDirname );
}

/************************************************************************/
Expand Down Expand Up @@ -1441,6 +1413,48 @@ int* VSIFilesystemHandler::UnlinkBatch( CSLConstList papszFiles )
return panRet;
}

/************************************************************************/
/* RmdirRecursive() */
/************************************************************************/

int VSIFilesystemHandler::RmdirRecursive( const char* pszDirname )
{
char** papszFiles = VSIReadDir(pszDirname);
for( char** papszIter = papszFiles; papszIter && *papszIter; ++papszIter )
{
if( (*papszIter)[0] == '\0' ||
strcmp(*papszIter, ".") == 0 ||
strcmp(*papszIter, "..") == 0 )
{
continue;
}
VSIStatBufL sStat;
const CPLString osFilename(
CPLFormFilename(pszDirname, *papszIter, nullptr));
if( VSIStatL(osFilename, &sStat) == 0 )
{
if( VSI_ISDIR(sStat.st_mode) )
{
if( RmdirRecursive(osFilename) != 0 )
{
CSLDestroy(papszFiles);
return -1;
}
}
else
{
if( VSIUnlink(osFilename) != 0 )
{
CSLDestroy(papszFiles);
return -1;
}
}
}
}
CSLDestroy(papszFiles);
return VSIRmdir(pszDirname);
}

#endif

/************************************************************************/
Expand Down
57 changes: 54 additions & 3 deletions gdal/port/cpl_vsil_s3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ struct VSIDIRS3: public VSIDIR
IVSIS3LikeFSHandler* poS3FS = nullptr;
IVSIS3LikeHandleHelper* poS3HandleHelper = nullptr;
int nMaxFiles = 0;
bool bCacheEntries = true;

explicit VSIDIRS3(IVSIS3LikeFSHandler *poFSIn): poFS(poFSIn), poS3FS(poFSIn) {}
explicit VSIDIRS3(VSICurlFilesystemHandler *poFSIn): poFS(poFSIn) {}
Expand Down Expand Up @@ -251,7 +252,7 @@ bool VSIDIRS3::AnalyseS3FileList(
entry->bMTimeKnown = true;
}

if( nMaxFiles != 1 )
if( nMaxFiles != 1 && bCacheEntries )
{
FileProp prop;
prop.eExists = EXIST_YES;
Expand Down Expand Up @@ -301,7 +302,7 @@ bool VSIDIRS3::AnalyseS3FileList(
entry->nMode = S_IFDIR;
entry->bModeKnown = true;

if( nMaxFiles != 1 )
if( nMaxFiles != 1 && bCacheEntries )
{
FileProp prop;
prop.eExists = EXIST_YES;
Expand Down Expand Up @@ -350,7 +351,7 @@ bool VSIDIRS3::AnalyseS3FileList(
entry->nMode = S_IFDIR;
entry->bModeKnown = true;

if( nMaxFiles != 1 )
if( nMaxFiles != 1 && bCacheEntries )
{
FileProp prop;
prop.eExists = EXIST_YES;
Expand Down Expand Up @@ -590,6 +591,7 @@ class VSIS3FSHandler final : public IVSIS3LikeFSHandler
char* GetSignedURL( const char* pszFilename, CSLConstList papszOptions ) override;

int* UnlinkBatch( CSLConstList papszFiles ) override;
int RmdirRecursive( const char* pszDirname ) override;
};

/************************************************************************/
Expand Down Expand Up @@ -1956,6 +1958,53 @@ int* VSIS3FSHandler::UnlinkBatch( CSLConstList papszFiles )
return panRet;
}

/************************************************************************/
/* RmdirRecursive() */
/************************************************************************/

int VSIS3FSHandler::RmdirRecursive( const char* pszDirname )
{
CPLString osDirnameWithoutEndSlash(pszDirname);
if( !osDirnameWithoutEndSlash.empty() && osDirnameWithoutEndSlash.back() == '/' )
osDirnameWithoutEndSlash.resize( osDirnameWithoutEndSlash.size() - 1 );

CPLStringList aosOptions;
aosOptions.SetNameValue("CACHE_ENTRIES", "FALSE");
auto poDir = std::unique_ptr<VSIDIR>(OpenDir(osDirnameWithoutEndSlash, -1, aosOptions.List()));
if( !poDir )
return -1;
CPLStringList aosList;
// For debug / testing only
const int nBatchSize = atoi(CPLGetConfigOption("CPL_VSIS3_UNLINK_BATCH_SIZE", "1000"));
while( true )
{
auto entry = poDir->NextDirEntry();
if( entry )
{
CPLString osFilename(osDirnameWithoutEndSlash + '/' + entry->pszName);
if( entry->nMode == S_IFDIR )
osFilename += '/';
aosList.AddString(osFilename);
}
if( entry == nullptr || aosList.size() == nBatchSize )
{
if( entry == nullptr && !osDirnameWithoutEndSlash.empty() )
{
aosList.AddString( (osDirnameWithoutEndSlash + '/').c_str() );
}
int* ret = UnlinkBatch(aosList.List());
if( ret == nullptr )
return -1;
CPLFree(ret);
aosList.Clear();
}
if( entry == nullptr )
break;
}
PartialClearCache(osDirnameWithoutEndSlash);
return 0;
}

/************************************************************************/
/* DeleteObjects() */
/************************************************************************/
Expand Down Expand Up @@ -2760,6 +2809,8 @@ VSIDIR* IVSIS3LikeFSHandler::OpenDir( const char *pszPath,
dir->osBucket = osBucket;
dir->osObjectKey = osObjectKey;
dir->nMaxFiles = atoi(CSLFetchNameValueDef(papszOptions, "MAXFILES", "0"));
dir->bCacheEntries = CPLTestBool(
CSLFetchNameValueDef(papszOptions, "CACHE_ENTRIES", "TRUE"));
if( !dir->IssueListDir() )
{
delete dir;
Expand Down
10 changes: 8 additions & 2 deletions gdal/swig/python/samples/gdal_rmdir.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,16 @@ def Usage():
def gdal_rm(argv, progress=None):
# pylint: disable=unused-argument
filename = None
recursive = False

argv = gdal.GeneralCmdLineProcessor(argv)
if argv is None:
return -1

for i in range(1, len(argv)):
if filename is None:
if argv[i] == '-r':
recursive = True
elif filename is None:
filename = argv[i]
elif argv[i][0] == '-':
print('Unexpected option : %s' % argv[i])
Expand All @@ -59,7 +62,10 @@ def gdal_rm(argv, progress=None):
if filename is None:
return Usage()

ret = gdal.Rmdir(filename)
if recursive:
ret = gdal.RmdirRecursive(filename)
else:
ret = gdal.Rmdir(filename)
if ret != 0:
print('Deletion failed')
return ret
Expand Down

0 comments on commit 8a702a1

Please sign in to comment.