multithreader read dir

This commit is contained in:
royalcat 2024-12-09 23:44:01 +03:00
parent 95016d54c1
commit b77ce50a7b
4 changed files with 106 additions and 46 deletions

View file

@ -41,7 +41,7 @@ var ArchiveFactories = map[string]FsFactory{
}, },
} }
type archiveLoader func(ctx context.Context, archivePath string, r ctxio.ReaderAt, size int64) (map[string]fileEntry, error) type archiveLoader func(ctx context.Context, archivePath string, r File, size int64) (map[string]fileEntry, error)
var _ Filesystem = &ArchiveFS{} var _ Filesystem = &ArchiveFS{}
@ -88,8 +88,8 @@ func (a *ArchiveFS) FsName() string {
return "archivefs" return "archivefs"
} }
func NewArchive(ctx context.Context, archivePath, name string, r ctxio.ReaderAt, size int64, loader archiveLoader) (*ArchiveFS, error) { func NewArchive(ctx context.Context, archivePath, name string, f File, size int64, loader archiveLoader) (*ArchiveFS, error) {
archiveFiles, err := loader(ctx, archivePath, r, size) archiveFiles, err := loader(ctx, archivePath, f, size)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -281,7 +281,12 @@ type archiveFileReaderFactory func(ctx context.Context) (ctxio.ReadCloser, error
var _ archiveLoader = ZipLoader var _ archiveLoader = ZipLoader
func ZipLoader(ctx context.Context, archivePath string, f ctxio.ReaderAt, size int64) (map[string]fileEntry, error) { func ZipLoader(ctx context.Context, archivePath string, f File, size int64) (map[string]fileEntry, error) {
hash, err := FileHash(ctx, f)
if err != nil {
return nil, err
}
reader := ctxio.IoReaderAt(ctx, f) reader := ctxio.IoReaderAt(ctx, f)
zr, err := zip.NewReader(reader, size) zr, err := zip.NewReader(reader, size)
if err != nil { if err != nil {
@ -314,7 +319,7 @@ func ZipLoader(ctx context.Context, archivePath string, f ctxio.ReaderAt, size i
info := zipFile.FileInfo() info := zipFile.FileInfo()
rr := newRandomReaderFromLinear(archiveFileIndex{archive: archivePath, filename: zipFile.Name}, info.Size(), af) rr := newRandomReaderFromLinear(archiveFileIndex{archiveHash: hash, filename: zipFile.Name}, info.Size(), af)
out[AbsPath(zipFile.Name)] = fileEntry{ out[AbsPath(zipFile.Name)] = fileEntry{
FileInfo: info, FileInfo: info,
@ -329,7 +334,12 @@ func ZipLoader(ctx context.Context, archivePath string, f ctxio.ReaderAt, size i
var _ archiveLoader = SevenZipLoader var _ archiveLoader = SevenZipLoader
func SevenZipLoader(ctx context.Context, archivePath string, ctxreader ctxio.ReaderAt, size int64) (map[string]fileEntry, error) { func SevenZipLoader(ctx context.Context, archivePath string, ctxreader File, size int64) (map[string]fileEntry, error) {
hash, err := FileHash(ctx, ctxreader)
if err != nil {
return nil, err
}
reader := ctxio.IoReaderAt(ctx, ctxreader) reader := ctxio.IoReaderAt(ctx, ctxreader)
r, err := sevenzip.NewReader(reader, size) r, err := sevenzip.NewReader(reader, size)
if err != nil { if err != nil {
@ -361,7 +371,7 @@ func SevenZipLoader(ctx context.Context, archivePath string, ctxreader ctxio.Rea
info := f.FileInfo() info := f.FileInfo()
rr := newRandomReaderFromLinear(archiveFileIndex{archive: archivePath, filename: f.Name}, info.Size(), af) rr := newRandomReaderFromLinear(archiveFileIndex{archiveHash: hash, filename: f.Name}, info.Size(), af)
out[AbsPath(f.Name)] = fileEntry{ out[AbsPath(f.Name)] = fileEntry{
FileInfo: f.FileInfo(), FileInfo: f.FileInfo(),
@ -376,8 +386,13 @@ func SevenZipLoader(ctx context.Context, archivePath string, ctxreader ctxio.Rea
var _ archiveLoader = RarLoader var _ archiveLoader = RarLoader
func RarLoader(ctx context.Context, archivePath string, ctxreader ctxio.ReaderAt, size int64) (map[string]fileEntry, error) { func RarLoader(ctx context.Context, archivePath string, f File, size int64) (map[string]fileEntry, error) {
reader := ioutils.WrapIoReadSeeker(ctx, ctxreader, size) hash, err := FileHash(ctx, f)
if err != nil {
return nil, err
}
reader := ioutils.WrapIoReadSeeker(ctx, f, size)
r, err := rardecode.NewReader(reader) r, err := rardecode.NewReader(reader)
if err != nil { if err != nil {
@ -396,7 +411,7 @@ func RarLoader(ctx context.Context, archivePath string, ctxreader ctxio.ReaderAt
name := header.Name name := header.Name
af := func(ctx context.Context) (ctxio.ReadCloser, error) { af := func(ctx context.Context) (ctxio.ReadCloser, error) {
reader := ioutils.WrapIoReadSeeker(ctx, ctxreader, size) reader := ioutils.WrapIoReadSeeker(ctx, f, size)
r, err := rardecode.NewReader(reader) r, err := rardecode.NewReader(reader)
if err != nil { if err != nil {
return nil, err return nil, err
@ -413,7 +428,7 @@ func RarLoader(ctx context.Context, archivePath string, ctxreader ctxio.ReaderAt
return nil, fmt.Errorf("file with name '%s' not found", name) return nil, fmt.Errorf("file with name '%s' not found", name)
} }
rr := newRandomReaderFromLinear(archiveFileIndex{archive: archivePath, filename: header.Name}, header.UnPackedSize, af) rr := newRandomReaderFromLinear(archiveFileIndex{archiveHash: hash, filename: header.Name}, header.UnPackedSize, af)
out[AbsPath(header.Name)] = fileEntry{ out[AbsPath(header.Name)] = fileEntry{
FileInfo: NewFileInfo(header.Name, header.UnPackedSize), FileInfo: NewFileInfo(header.Name, header.UnPackedSize),

View file

@ -18,8 +18,8 @@ const cacheSize = 1024 * 1024 * 1024 * 4 // 4GB of total usage
const defaultBlockCount = cacheSize / blockSize const defaultBlockCount = cacheSize / blockSize
type archiveFileIndex struct { type archiveFileIndex struct {
archive string archiveHash Hash
filename string filename string
} }
type blockIndex struct { type blockIndex struct {
@ -107,7 +107,7 @@ func (a *randomReaderFromLinear) readBlock(ctx context.Context, bI blockIndex) (
a.readerMutex.Lock() a.readerMutex.Lock()
defer a.readerMutex.Unlock() defer a.readerMutex.Unlock()
if b, ok := blockCache.Get(bI); ok { // check again, maybe another goroutine already read this block if b, ok := blockCache.Get(bI); ok && b.len != 0 { // check again, maybe another goroutine already read this block
return b, nil return b, nil
} }

View file

@ -4,10 +4,11 @@ import (
"archive/zip" "archive/zip"
"bytes" "bytes"
"context" "context"
"io"
"io/fs"
"testing" "testing"
"git.kmsign.ru/royalcat/tstor/src/vfs" "git.kmsign.ru/royalcat/tstor/src/vfs"
"github.com/royalcat/ctxio"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -62,24 +63,24 @@ func TestZipFilesystem(t *testing.T) {
f, err := zfs.Open(ctx, "/path/to/test/file/1.txt") f, err := zfs.Open(ctx, "/path/to/test/file/1.txt")
require.NoError(err) require.NoError(err)
n, err := f.Read(ctx, out) n, err := f.Read(ctx, out)
require.NoError(err) require.ErrorIs(err, io.EOF)
require.Equal(5, n) require.Equal(5, n)
require.Equal([]byte("Hello"), out) require.Equal([]byte("Hello"), out)
outSpace := make([]byte, 1) outSpace := make([]byte, 1)
n, err = f.Read(ctx, outSpace) n, err = f.Read(ctx, outSpace)
require.NoError(err) require.ErrorIs(err, io.EOF)
require.Equal(1, n) require.Equal(1, n)
require.Equal([]byte(" "), outSpace) require.Equal([]byte(" "), outSpace)
n, err = f.Read(ctx, out) n, err = f.Read(ctx, out)
require.NoError(err) require.ErrorIs(err, io.EOF)
require.Equal(5, n) require.Equal(5, n)
require.Equal([]byte("World"), out) require.Equal([]byte("World"), out)
} }
func createTestZip(require *require.Assertions) (ctxio.ReaderAt, int64) { func createTestZip(require *require.Assertions) (vfs.File, int64) {
buf := bytes.NewBuffer([]byte{}) buf := bytes.NewBuffer([]byte{})
zWriter := zip.NewWriter(buf) zWriter := zip.NewWriter(buf)
@ -95,17 +96,59 @@ func createTestZip(require *require.Assertions) (ctxio.ReaderAt, int64) {
return newCBR(buf.Bytes()), int64(buf.Len()) return newCBR(buf.Bytes()), int64(buf.Len())
} }
type closeableByteReader struct {
data *bytes.Reader
}
func newCBR(b []byte) *closeableByteReader { func newCBR(b []byte) *closeableByteReader {
return &closeableByteReader{ return &closeableByteReader{
data: bytes.NewReader(b), data: bytes.NewReader(b),
} }
} }
var _ vfs.File = &closeableByteReader{}
type closeableByteReader struct {
data *bytes.Reader
}
// ReadAt implements ctxio.ReaderAt. // ReadAt implements ctxio.ReaderAt.
func (c *closeableByteReader) ReadAt(ctx context.Context, p []byte, off int64) (n int, err error) { func (c *closeableByteReader) ReadAt(ctx context.Context, p []byte, off int64) (n int, err error) {
return c.data.ReadAt(p, off) return c.data.ReadAt(p, off)
} }
// Close implements vfs.File.
func (c *closeableByteReader) Close(ctx context.Context) error {
panic("unimplemented")
}
// Info implements vfs.File.
func (c *closeableByteReader) Info() (fs.FileInfo, error) {
panic("unimplemented")
}
// IsDir implements vfs.File.
func (c *closeableByteReader) IsDir() bool {
panic("unimplemented")
}
// Name implements vfs.File.
func (c *closeableByteReader) Name() string {
panic("unimplemented")
}
// Read implements vfs.File.
func (c *closeableByteReader) Read(ctx context.Context, p []byte) (n int, err error) {
return c.data.Read(p)
}
// Seek implements vfs.File.
func (c *closeableByteReader) Seek(offset int64, whence int) (int64, error) {
return c.data.Seek(offset, whence)
}
// Size implements vfs.File.
func (c *closeableByteReader) Size() int64 {
return c.data.Size()
}
// Type implements vfs.File.
func (c *closeableByteReader) Type() fs.FileMode {
panic("unimplemented")
}

View file

@ -14,6 +14,7 @@ import (
"time" "time"
"git.kmsign.ru/royalcat/tstor/pkg/rlog" "git.kmsign.ru/royalcat/tstor/pkg/rlog"
"github.com/sourcegraph/conc/iter"
"go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace" "go.opentelemetry.io/otel/trace"
"golang.org/x/exp/maps" "golang.org/x/exp/maps"
@ -111,8 +112,8 @@ func (r *ResolverFS) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, e
if err != nil { if err != nil {
return nil, err return nil, err
} }
out := make([]fs.DirEntry, 0, len(entries)) out, err := iter.MapErr(entries, func(pe *fs.DirEntry) (fs.DirEntry, error) {
for _, e := range entries { e := *pe
if r.resolver.IsNestedFs(e.Name()) { if r.resolver.IsNestedFs(e.Name()) {
filepath := path.Join("/", name, e.Name()) filepath := path.Join("/", name, e.Name())
file, err := r.rootFS.Open(ctx, filepath) file, err := r.rootFS.Open(ctx, filepath)
@ -125,16 +126,22 @@ func (r *ResolverFS) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, e
} }
if err != nil { if err != nil {
log.Error(ctx, "error creating nested fs", rlog.Error(err)) log.Error(ctx, "error creating nested fs", rlog.Error(err))
out = append(out, e) return nil, fmt.Errorf("error creating nested fs: %w", err)
continue
} }
return nestedfs, nil
out = append(out, nestedfs)
} else { } else {
out = append(out, e) return e, nil
} }
})
if err != nil {
log.Error(ctx, "error mapping entries", rlog.Error(err))
err = nil
} }
return out, nil
out = slices.DeleteFunc(out, func(e fs.DirEntry) bool { return e == nil })
return out, err
} }
// Stat implements Filesystem. // Stat implements Filesystem.
@ -228,14 +235,14 @@ type FsFactory func(ctx context.Context, sourcePath string, f File) (Filesystem,
func NewResolver(factories map[string]FsFactory) *Resolver { func NewResolver(factories map[string]FsFactory) *Resolver {
return &Resolver{ return &Resolver{
factories: factories, factories: factories,
fsmap: map[Hash]Filesystem{}, fsmap: map[string]Filesystem{},
} }
} }
type Resolver struct { type Resolver struct {
m sync.Mutex m sync.Mutex
factories map[string]FsFactory factories map[string]FsFactory
fsmap map[Hash]Filesystem // filesystem cache fsmap map[string]Filesystem // filesystem cache
// TODO: add fsmap clean // TODO: add fsmap clean
} }
@ -255,15 +262,10 @@ func (r *Resolver) nestedFs(ctx context.Context, fsPath string, file File) (File
return nil, file.Close(ctx) return nil, file.Close(ctx)
} }
fileHash, err := FileHash(ctx, file)
if err != nil {
return nil, fmt.Errorf("error calculating file hash: %w", err)
}
r.m.Lock() r.m.Lock()
defer r.m.Unlock() defer r.m.Unlock()
if nestedFs, ok := r.fsmap[fileHash]; ok { if nestedFs, ok := r.fsmap[fsPath]; ok {
return nestedFs, file.Close(ctx) return nestedFs, file.Close(ctx)
} }
@ -276,7 +278,7 @@ func (r *Resolver) nestedFs(ctx context.Context, fsPath string, file File) (File
if err != nil { if err != nil {
return nil, fmt.Errorf("error calling nest factory: %s with error: %w", fsPath, err) return nil, fmt.Errorf("error calling nest factory: %s with error: %w", fsPath, err)
} }
r.fsmap[fileHash] = nestedFs r.fsmap[fsPath] = nestedFs
return nestedFs, nil return nestedFs, nil
@ -319,10 +321,10 @@ PARTS_LOOP:
if err != nil { if err != nil {
return "", nil, "", fmt.Errorf("error opening filesystem file: %s with error: %w", fsPath, err) return "", nil, "", fmt.Errorf("error opening filesystem file: %s with error: %w", fsPath, err)
} }
fileHash, err := FileHash(ctx, file) // fileHash, err := FileHash(ctx, file)
if err != nil { // if err != nil {
return "", nil, "", fmt.Errorf("error calculating file hash: %w", err) // return "", nil, "", fmt.Errorf("error calculating file hash: %w", err)
} // }
err = file.Close(ctx) err = file.Close(ctx)
if err != nil { if err != nil {
return "", nil, "", fmt.Errorf("error closing file: %w", err) return "", nil, "", fmt.Errorf("error closing file: %w", err)
@ -335,7 +337,7 @@ PARTS_LOOP:
r.m.Lock() r.m.Lock()
defer r.m.Unlock() defer r.m.Unlock()
if nestedFs, ok := r.fsmap[fileHash]; ok { if nestedFs, ok := r.fsmap[fsPath]; ok {
span.AddEvent("fs loaded from cache", trace.WithAttributes(attribute.String("nestedFs", reflect.TypeOf(nestedFs).Name()))) span.AddEvent("fs loaded from cache", trace.WithAttributes(attribute.String("nestedFs", reflect.TypeOf(nestedFs).Name())))
return fsPath, nestedFs, nestedFsPath, nil return fsPath, nestedFs, nestedFsPath, nil
} else { } else {
@ -352,7 +354,7 @@ PARTS_LOOP:
if err != nil { if err != nil {
return "", nil, "", fmt.Errorf("error creating filesystem from file: %s with error: %w", fsPath, err) return "", nil, "", fmt.Errorf("error creating filesystem from file: %s with error: %w", fsPath, err)
} }
r.fsmap[fileHash] = nestedFs r.fsmap[fsPath] = nestedFs
span.AddEvent("fs created", trace.WithAttributes(attribute.String("nestedFs", reflect.TypeOf(nestedFs).Name()))) span.AddEvent("fs created", trace.WithAttributes(attribute.String("nestedFs", reflect.TypeOf(nestedFs).Name())))