Skip to content

Commit

Permalink
Add fsync and hard link support, start using it in snapshots
Browse files Browse the repository at this point in the history
* Add two new `HasBlockIO` primitives for `fsync`, one for synchronising files,
  one for directories. These are separate, since we can not synchronise
  directories on `Windows`.

* Add one new `HasBlockIO` primitive for creating hard links.

* Add `IO` specialisations to `blockio-api`.

* Use the new `HasBlockIO` primitives in the snapshot implementation. Note that
  when we create a snapshot, that we make *both* the hard links and their parent
  directory (the named snapshot directory) durable.

Co-authored-by: Recursion Ninja <[email protected]>
  • Loading branch information
jorisdral and recursion-ninja committed Dec 9, 2024
1 parent 82df7e4 commit a6e28c7
Show file tree
Hide file tree
Showing 13 changed files with 349 additions and 65 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ key-value store.

## System requirements

This library only supports 64-bit, little-endian systems.
This library only supports 64-bit, little-endian systems. On Windows, the
library only works probably on drives with NTFS.

Provide the -threaded flag to executables, test suites and benchmark suites if
you use this library on Linux systems.
17 changes: 15 additions & 2 deletions blockio-api/src-linux/System/FS/BlockIO/Async.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@ asyncHasBlockIO ::
-> (Handle HandleIO -> FileOffset -> FileOffset -> API.Advice -> IO ())
-> (Handle HandleIO -> FileOffset -> FileOffset -> IO ())
-> (FsPath -> LockMode -> IO (Maybe (API.LockFileHandle IO)))
-> (Handle HandleIO -> IO ())
-> (FsPath -> IO ())
-> (FsPath -> FsPath -> IO ())
-> HasFS IO HandleIO
-> API.IOCtxParams
-> IO (API.HasBlockIO IO HandleIO)
asyncHasBlockIO hSetNoCache hAdvise hAllocate tryLockFile hasFS ctxParams = do
asyncHasBlockIO hSetNoCache hAdvise hAllocate tryLockFile hSynchronise synchroniseDirectory createHardLink hasFS ctxParams = do
ctx <- I.initIOCtx (ctxParamsConv ctxParams)
pure $ API.HasBlockIO {
API.close = I.closeIOCtx ctx
Expand All @@ -44,6 +47,9 @@ asyncHasBlockIO hSetNoCache hAdvise hAllocate tryLockFile hasFS ctxParams = do
, API.hAdvise
, API.hAllocate
, API.tryLockFile
, API.hSynchronise
, API.synchroniseDirectory
, API.createHardLink
}

ctxParamsConv :: API.IOCtxParams -> I.IOCtxParams
Expand Down Expand Up @@ -110,11 +116,18 @@ ioopConv (IOOpWrite h off buf bufOff c) = handleFd h >>= \fd ->
--
-- TODO: if the handle were to have a reader/writer lock, then we could take the
-- reader lock in 'submitIO'. However, the current implementation of 'Handle'
-- only allows mutally exclusive access to the underlying file descriptor, so it
-- only allows mutually exclusive access to the underlying file descriptor, so it
-- would require a change in @fs-api@. See [fs-sim#49].
handleFd :: Handle HandleIO -> IO Fd
handleFd h = withOpenHandle "submitIO" (handleRaw h) pure

{-# SPECIALISE hzipWithM ::
(VUM.Unbox b, VUM.Unbox c)
=> (a -> b -> IO c)
-> V.Vector a
-> VU.Vector b
-> IO (VU.Vector c)
#-}
-- | Heterogeneous blend of `V.zipWithM` and `VU.zipWithM`
--
-- The @vector@ package does not provide functions that take distinct vector
Expand Down
36 changes: 33 additions & 3 deletions blockio-api/src-linux/System/FS/BlockIO/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ module System.FS.BlockIO.Internal (
ioHasBlockIO
) where

import System.FS.API (Handle (..), HasFS)
import qualified System.FS.API as FS
import System.FS.API (FsPath, Handle (..), HasFS)
import qualified System.FS.BlockIO.API as FS
import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO,
IOCtxParams)
import System.FS.IO (HandleIO)
import qualified System.FS.IO.Handle as FS
import qualified System.Posix.Fcntl as Fcntl
import qualified System.Posix.Fcntl.NoCache as Unix
import qualified System.Posix.Files as Unix
import qualified System.Posix.Unistd as Unix

#if SERIALBLOCKIO
import qualified System.FS.BlockIO.Serial as Serial
Expand All @@ -24,9 +27,28 @@ ioHasBlockIO ::
-> IOCtxParams
-> IO (HasBlockIO IO HandleIO)
#if SERIALBLOCKIO
ioHasBlockIO hfs _params = Serial.serialHasBlockIO hSetNoCache hAdvise hAllocate (FS.tryLockFileIO hfs) hfs
ioHasBlockIO hfs _params =
Serial.serialHasBlockIO
hSetNoCache
hAdvise
hAllocate
(FS.tryLockFileIO hfs)
hSynchronise
(synchroniseDirectory hfs)
(FS.createHardLinkIO hfs Unix.createLink)
hfs
#else
ioHasBlockIO hfs params = Async.asyncHasBlockIO hSetNoCache hAdvise hAllocate (FS.tryLockFileIO hfs) hfs params
ioHasBlockIO hfs params =
Async.asyncHasBlockIO
hSetNoCache
hAdvise
hAllocate
(FS.tryLockFileIO hfs)
hSynchronise
(synchroniseDirectory hfs)
(FS.createHardLinkIO hfs Unix.createLink)
hfs
params
#endif

hSetNoCache :: Handle HandleIO -> Bool -> IO ()
Expand All @@ -48,3 +70,11 @@ hAdvise h off len advice = FS.withOpenHandle "hAdvise" (handleRaw h) $ \fd ->
hAllocate :: Handle HandleIO -> FileOffset -> FileOffset -> IO ()
hAllocate h off len = FS.withOpenHandle "hAllocate" (handleRaw h) $ \fd ->
Fcntl.fileAllocate fd off len

hSynchronise :: Handle HandleIO -> IO ()
hSynchronise h = FS.withOpenHandle "hSynchronise" (handleRaw h) $ \fd ->
Unix.fileSynchronise fd

synchroniseDirectory :: HasFS IO HandleIO -> FsPath -> IO ()
synchroniseDirectory hfs path =
FS.withFile hfs path FS.ReadMode $ hSynchronise
24 changes: 22 additions & 2 deletions blockio-api/src-macos/System/FS/BlockIO/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@ module System.FS.BlockIO.Internal (
ioHasBlockIO
) where

import System.FS.API (Handle (..), HasFS)
import qualified System.FS.API as FS
import System.FS.API (FsPath, Handle (..), HasFS)
import qualified System.FS.BlockIO.API as FS
import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO,
IOCtxParams)
import qualified System.FS.BlockIO.Serial as Serial
import System.FS.IO (HandleIO)
import qualified System.FS.IO.Handle as FS
import qualified System.Posix.Fcntl.NoCache as Unix
import qualified System.Posix.Files as Unix
import qualified System.Posix.Unistd as Unix

-- | For now we use the portable serial implementation of HasBlockIO. If you
-- want to provide a proper async I/O implementation for OSX, then this is where
Expand All @@ -20,7 +23,16 @@ ioHasBlockIO ::
HasFS IO HandleIO
-> IOCtxParams
-> IO (HasBlockIO IO HandleIO)
ioHasBlockIO hfs _params = Serial.serialHasBlockIO hSetNoCache hAdvise hAllocate (FS.tryLockFileIO hfs) hfs
ioHasBlockIO hfs _params =
Serial.serialHasBlockIO
hSetNoCache
hAdvise
hAllocate
(FS.tryLockFileIO hfs)
hSynchronise
(synchroniseDirectory hfs)
(FS.createHardLinkIO hfs Unix.createLink)
hfs

hSetNoCache :: Handle HandleIO -> Bool -> IO ()
hSetNoCache h b =
Expand All @@ -34,3 +46,11 @@ hAdvise _h _off _len _advice = pure ()

hAllocate :: Handle HandleIO -> FileOffset -> FileOffset -> IO ()
hAllocate _h _off _len = pure ()

hSynchronise :: Handle HandleIO -> IO ()
hSynchronise h = FS.withOpenHandle "hSynchronise" (handleRaw h) $ \fd ->
Unix.fileSynchronise fd

synchroniseDirectory :: HasFS IO HandleIO -> FsPath -> IO ()
synchroniseDirectory hfs path =
FS.withFile hfs path FS.ReadMode $ hSynchronise
36 changes: 34 additions & 2 deletions blockio-api/src-windows/System/FS/BlockIO/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,20 @@ module System.FS.BlockIO.Internal (
ioHasBlockIO
) where

import System.FS.API (Handle (..), HasFS)
import Control.Exception (throwIO)
import Control.Monad (unless)
import qualified System.FS.API as FS
import System.FS.API (FsPath, Handle (..), HasFS)
import qualified System.FS.BlockIO.API as FS
import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO,
IOCtxParams)
import qualified System.FS.BlockIO.Serial as Serial
import System.FS.IO (HandleIO)
import qualified System.FS.IO.Handle as FS
import System.IO.Error (doesNotExistErrorType, ioeSetErrorString,
mkIOError)
import qualified System.Win32.File as Windows
import qualified System.Win32.HardLink as Windows

-- | For now we use the portable serial implementation of HasBlockIO. If you
-- want to provide a proper async I/O implementation for Windows, then this is
Expand All @@ -18,7 +26,16 @@ ioHasBlockIO ::
HasFS IO HandleIO
-> IOCtxParams
-> IO (HasBlockIO IO HandleIO)
ioHasBlockIO hfs _params = Serial.serialHasBlockIO hSetNoCache hAdvise hAllocate (FS.tryLockFileIO hfs) hfs
ioHasBlockIO hfs _params =
Serial.serialHasBlockIO
hSetNoCache
hAdvise
hAllocate
(FS.tryLockFileIO hfs)
hSynchronise
(synchroniseDirectory hfs)
(FS.createHardLinkIO hfs Windows.createHardLink)
hfs

hSetNoCache :: Handle HandleIO -> Bool -> IO ()
hSetNoCache _h _b = pure ()
Expand All @@ -28,3 +45,18 @@ hAdvise _h _off _len _advice = pure ()

hAllocate :: Handle HandleIO -> FileOffset -> FileOffset -> IO ()
hAllocate _h _off _len = pure ()

hSynchronise :: Handle HandleIO -> IO ()
hSynchronise h = FS.withOpenHandle "hAdvise" (handleRaw h) $ \fd ->
Windows.flushFileBuffers fd

synchroniseDirectory :: HasFS IO HandleIO -> FsPath -> IO ()
synchroniseDirectory hfs path = do
b <- FS.doesDirectoryExist hfs path
unless b $
throwIO $ FS.ioToFsError (FS.mkFsErrorPath hfs (FS.mkFsPath [])) ioerr
where
ioerr =
ioeSetErrorString
(mkIOError doesNotExistErrorType "synchroniseDirectory" Nothing Nothing)
("synchroniseDirectory: directory does not exist")
93 changes: 88 additions & 5 deletions blockio-api/src/System/FS/BlockIO/API.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
{-# LANGUAGE UnboxedTuples #-}

module System.FS.BlockIO.API (
-- * HasBlockIO
HasBlockIO (..)
, IOCtxParams (..)
, defaultIOCtxParams
Expand All @@ -19,15 +20,19 @@ module System.FS.BlockIO.API (
, ioopBufferOffset
, ioopByteCount
, IOResult (..)
-- * Advice
-- ** Advice
, Advice (..)
, hAdviseAll
, hDropCacheAll
-- * File locks
-- ** File locks
, GHC.LockMode (..)
, GHC.FileLockingNotSupported (..)
, LockFileHandle (..)
-- ** Storage synchronisation
, synchroniseFile
-- * Defaults for the real file system
, tryLockFileIO
, createHardLinkIO
-- * Re-exports
, ByteCount
, FileOffset
Expand All @@ -52,7 +57,8 @@ import System.FS.API (BufferOffset, FsError (..), FsPath, Handle (..),
HasFS, SomeHasFS (..))
import System.FS.IO (HandleIO)
import qualified System.IO as GHC
import System.IO.Error (ioeSetErrorString, mkIOError)
import System.IO.Error (doesNotExistErrorType, ioeSetErrorString,
mkIOError)
import System.Posix.Types (ByteCount, FileOffset)

-- | Abstract interface for submitting large batches of I\/O operations.
Expand Down Expand Up @@ -125,12 +131,42 @@ data HasBlockIO m h = HasBlockIO {
-- limited scope. That is, it has to fit the style of @withHandleToHANDLE ::
-- Handle -> (HANDLE -> IO a) -> IO a@ from the @Win32@ package.
, tryLockFile :: FsPath -> GHC.LockMode -> m (Maybe (LockFileHandle m))

-- | Synchronise file contents with the storage device.
--
-- Ensure that all change to the file handle's contents which exist only in
-- memory (as buffered system cache pages) are transfered/flushed to disk.
-- This will also update the file handle's associated metadata.
--
-- This uses different system calls on different distributions.
-- * [Linux]: @fsync(2)@
-- * [MacOS]: @fsync(2)@
-- * [Windows]: @flushFileBuffers@
, hSynchronise :: Handle h -> m ()

-- | Synchronise a directory with the storage device.
--
-- This uses different system calls on different distributions.
-- * [Linux]: @fsync(2)@
-- * [MacOS]: @fsync(2)@
-- * [Windows]: no-op
, synchroniseDirectory :: FsPath -> m ()

-- | Create a hard link for an existing file at the source path and a new
-- file at the target path.
--
-- This uses different system calls on different distributions.
-- * [Linux]: @link@
-- * [MacOS]: @link@
-- * [Windows]: @CreateHardLinkW@
, createHardLink :: FsPath -> FsPath -> m ()
}

instance NFData (HasBlockIO m h) where
rnf (HasBlockIO a b c d e f) =
rnf (HasBlockIO a b c d e f g h i) =
rwhnf a `seq` rwhnf b `seq` rnf c `seq`
rwhnf d `seq` rwhnf e `seq` rwhnf f
rwhnf d `seq` rwhnf e `seq` rwhnf f `seq`
rwhnf g `seq` rwhnf h `seq` rwhnf i

-- | Concurrency parameters for initialising a 'HasBlockIO. Can be ignored by
-- serial implementations.
Expand Down Expand Up @@ -195,6 +231,10 @@ deriving via (VU.UnboxViaPrim IOResult) instance VG.Vector VU.Vector IOResult

instance VUM.Unbox IOResult

{-------------------------------------------------------------------------------
Advice
-------------------------------------------------------------------------------}

-- | Basically "System.Posix.Fcntl.Advice" from the @unix@ package
data Advice =
AdviceNormal
Expand All @@ -214,6 +254,36 @@ hAdviseAll hbio h advice = hAdvise hbio h 0 0 advice -- len=0 implies until the
hDropCacheAll :: HasBlockIO m h -> Handle h -> m ()
hDropCacheAll hbio h = hAdviseAll hbio h AdviceDontNeed

{-------------------------------------------------------------------------------
Storage synchronisation
-------------------------------------------------------------------------------}

-- TODO: currently, we perform an explicit check to see if the file exists and
-- throw an error when it does not exist. We would prefer to be able to rely on
-- withFile to throw an error for us that we could rethrow with an upated
-- description/location. Unfortunately, we have to open te file in ReadWriteMode
-- on Windows, and withFile currently does not support such errors. The only
-- options are:
--
-- * AllowExisting: silently create a file if it does not exist
-- * MustBeNew: throw an error if the file exists
--
-- We would need to add a third option to fs-api:
--
-- * MustExist: throw an error if the file *does not* exist
synchroniseFile :: MonadThrow m => HasFS m h -> HasBlockIO m h -> FsPath -> m ()
synchroniseFile hfs hbio path = do
b <- FS.doesFileExist hfs path
if b then
FS.withFile hfs path (FS.ReadWriteMode FS.AllowExisting) $ hSynchronise hbio
else
throwIO $ FS.ioToFsError (FS.mkFsErrorPath hfs (FS.mkFsPath [])) ioerr
where
ioerr =
ioeSetErrorString
(mkIOError doesNotExistErrorType "synchroniseFile" Nothing Nothing)
("synchroniseFile: file does not exist")

{-------------------------------------------------------------------------------
File locks
-------------------------------------------------------------------------------}
Expand Down Expand Up @@ -249,3 +319,16 @@ rethrowFsErrorIO hfs fp action = do
handleError :: HasCallStack => IOError -> IO a
handleError ioErr =
throwIO $ FS.ioToFsError (FS.mkFsErrorPath hfs fp) ioErr

{-------------------------------------------------------------------------------
Hard links
-------------------------------------------------------------------------------}

createHardLinkIO ::
HasFS IO HandleIO
-> (FilePath -> FilePath -> IO ())
-> (FsPath -> FsPath -> IO ())
createHardLinkIO hfs f = \source target -> do
source' <- FS.unsafeToFilePath hfs source -- shouldn't fail because we are in IO
target' <- FS.unsafeToFilePath hfs target -- shouldn't fail because we are in IO
f source' target'
Loading

0 comments on commit a6e28c7

Please sign in to comment.