2024-05-19 21:24:09 +00:00
|
|
|
package torrent
|
2024-01-28 20:22:49 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2024-03-28 13:09:42 +00:00
|
|
|
"crypto/sha1"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"io/fs"
|
2024-01-28 20:22:49 +00:00
|
|
|
"log/slog"
|
|
|
|
"os"
|
|
|
|
"path"
|
|
|
|
"path/filepath"
|
|
|
|
"slices"
|
|
|
|
|
|
|
|
"github.com/anacrolix/torrent"
|
|
|
|
"github.com/anacrolix/torrent/metainfo"
|
|
|
|
"github.com/anacrolix/torrent/storage"
|
2024-03-28 13:09:42 +00:00
|
|
|
"github.com/dustin/go-humanize"
|
|
|
|
"go.opentelemetry.io/otel/attribute"
|
|
|
|
"go.opentelemetry.io/otel/trace"
|
|
|
|
"golang.org/x/exp/maps"
|
|
|
|
"golang.org/x/sys/unix"
|
2024-01-28 20:22:49 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// NewFileStorage creates a new ClientImplCloser that stores files using the OS native filesystem.
|
2024-03-28 13:09:42 +00:00
|
|
|
func NewFileStorage(baseDir string, pc storage.PieceCompletion) *DataStorage {
|
|
|
|
return &DataStorage{
|
2024-01-28 20:22:49 +00:00
|
|
|
ClientImplCloser: storage.NewFileOpts(storage.NewFileClientOpts{
|
|
|
|
ClientBaseDir: baseDir,
|
|
|
|
PieceCompletion: pc,
|
|
|
|
TorrentDirMaker: torrentDir,
|
|
|
|
FilePathMaker: filePath,
|
|
|
|
}),
|
2024-03-28 13:09:42 +00:00
|
|
|
baseDir: baseDir,
|
2024-01-28 20:22:49 +00:00
|
|
|
pieceCompletion: pc,
|
|
|
|
log: slog.With("component", "torrent-client"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// File-based storage for torrents, that isn't yet bound to a particular torrent.
|
2024-03-28 13:09:42 +00:00
|
|
|
type DataStorage struct {
|
2024-01-28 20:22:49 +00:00
|
|
|
baseDir string
|
|
|
|
storage.ClientImplCloser
|
|
|
|
pieceCompletion storage.PieceCompletion
|
|
|
|
log *slog.Logger
|
|
|
|
}
|
|
|
|
|
2024-03-28 13:09:42 +00:00
|
|
|
func (me *DataStorage) Close() error {
|
2024-01-28 20:22:49 +00:00
|
|
|
return me.pieceCompletion.Close()
|
|
|
|
}
|
|
|
|
|
|
|
|
func torrentDir(baseDir string, info *metainfo.Info, infoHash metainfo.Hash) string {
|
2024-02-22 22:54:56 +00:00
|
|
|
dirName := info.Name
|
|
|
|
if dirName == "" {
|
|
|
|
dirName = infoHash.HexString()
|
|
|
|
}
|
|
|
|
|
|
|
|
return filepath.Join(baseDir, dirName)
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func filePath(opts storage.FilePathMakerOpts) string {
|
|
|
|
return filepath.Join(opts.File.Path...)
|
|
|
|
}
|
|
|
|
|
2024-03-28 13:09:42 +00:00
|
|
|
func (fs *DataStorage) filePath(info *metainfo.Info, infoHash metainfo.Hash, fileInfo *metainfo.FileInfo) string {
|
2024-01-28 20:22:49 +00:00
|
|
|
return filepath.Join(torrentDir(fs.baseDir, info, infoHash), filePath(storage.FilePathMakerOpts{
|
|
|
|
Info: info,
|
|
|
|
File: fileInfo,
|
|
|
|
}))
|
|
|
|
}
|
|
|
|
|
2024-03-28 13:09:42 +00:00
|
|
|
func (fs *DataStorage) DeleteFile(file *torrent.File) error {
|
2024-01-28 20:22:49 +00:00
|
|
|
info := file.Torrent().Info()
|
|
|
|
infoHash := file.Torrent().InfoHash()
|
|
|
|
torrentDir := torrentDir(fs.baseDir, info, infoHash)
|
|
|
|
fileInfo := file.FileInfo()
|
|
|
|
relFilePath := filePath(storage.FilePathMakerOpts{
|
|
|
|
Info: info,
|
|
|
|
File: &fileInfo,
|
|
|
|
})
|
|
|
|
filePath := path.Join(torrentDir, relFilePath)
|
|
|
|
for i := file.BeginPieceIndex(); i < file.EndPieceIndex(); i++ {
|
|
|
|
pk := metainfo.PieceKey{InfoHash: infoHash, Index: i}
|
|
|
|
err := fs.pieceCompletion.Set(pk, false)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return os.Remove(filePath)
|
|
|
|
}
|
|
|
|
|
2024-05-19 21:24:09 +00:00
|
|
|
func (fs *DataStorage) CleanupDirs(ctx context.Context, expected []*Controller, dryRun bool) ([]string, error) {
|
2024-01-28 20:22:49 +00:00
|
|
|
log := fs.log.With("function", "CleanupDirs", "expectedTorrents", len(expected), "dryRun", dryRun)
|
|
|
|
|
|
|
|
expectedEntries := []string{}
|
|
|
|
for _, e := range expected {
|
|
|
|
expectedEntries = append(expectedEntries, e.Torrent().Name())
|
|
|
|
}
|
|
|
|
|
|
|
|
entries, err := os.ReadDir(fs.baseDir)
|
|
|
|
if err != nil {
|
2024-03-28 13:09:42 +00:00
|
|
|
return nil, err
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
toDelete := []string{}
|
|
|
|
for _, v := range entries {
|
|
|
|
if !slices.Contains(expectedEntries, v.Name()) {
|
|
|
|
toDelete = append(toDelete, v.Name())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ctx.Err() != nil {
|
2024-03-28 13:09:42 +00:00
|
|
|
return nil, ctx.Err()
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
log.Info("deleting trash data", "dirsCount", len(toDelete))
|
|
|
|
if !dryRun {
|
|
|
|
for i, name := range toDelete {
|
|
|
|
p := path.Join(fs.baseDir, name)
|
|
|
|
log.Warn("deleting trash data", "path", p)
|
|
|
|
err := os.RemoveAll(p)
|
|
|
|
if err != nil {
|
2024-03-28 13:09:42 +00:00
|
|
|
return toDelete[:i], err
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-28 13:09:42 +00:00
|
|
|
return toDelete, nil
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
|
2024-05-19 21:24:09 +00:00
|
|
|
func (s *DataStorage) CleanupFiles(ctx context.Context, expected []*Controller, dryRun bool) ([]string, error) {
|
2024-03-28 13:09:42 +00:00
|
|
|
log := s.log.With("function", "CleanupFiles", "expectedTorrents", len(expected), "dryRun", dryRun)
|
2024-01-28 20:22:49 +00:00
|
|
|
|
|
|
|
expectedEntries := []string{}
|
|
|
|
{
|
|
|
|
for _, e := range expected {
|
2024-03-17 21:00:34 +00:00
|
|
|
files, err := e.Files(ctx)
|
2024-01-28 20:22:49 +00:00
|
|
|
if err != nil {
|
2024-03-28 13:09:42 +00:00
|
|
|
return nil, err
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, f := range files {
|
2024-03-28 13:09:42 +00:00
|
|
|
expectedEntries = append(expectedEntries, s.filePath(e.Torrent().Info(), e.Torrent().InfoHash(), ptr(f.FileInfo())))
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
entries := []string{}
|
2024-03-28 13:09:42 +00:00
|
|
|
err := filepath.WalkDir(s.baseDir,
|
|
|
|
func(path string, info fs.DirEntry, err error) error {
|
2024-01-28 20:22:49 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
if info.IsDir() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
entries = append(entries, path)
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
2024-03-28 13:09:42 +00:00
|
|
|
return nil, err
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
toDelete := []string{}
|
|
|
|
for _, v := range entries {
|
|
|
|
if !slices.Contains(expectedEntries, v) {
|
|
|
|
toDelete = append(toDelete, v)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ctx.Err() != nil {
|
2024-03-28 13:09:42 +00:00
|
|
|
return toDelete, ctx.Err()
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
log.Info("deleting trash data", "filesCount", len(toDelete))
|
|
|
|
if !dryRun {
|
|
|
|
for i, p := range toDelete {
|
2024-03-28 13:09:42 +00:00
|
|
|
s.log.Warn("deleting trash data", "path", p)
|
2024-01-28 20:22:49 +00:00
|
|
|
err := os.Remove(p)
|
|
|
|
if err != nil {
|
2024-03-28 13:09:42 +00:00
|
|
|
return toDelete[i:], err
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-03-28 13:09:42 +00:00
|
|
|
return toDelete, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *DataStorage) iterFiles(ctx context.Context, iter func(ctx context.Context, path string, entry fs.FileInfo) error) error {
|
|
|
|
return filepath.Walk(s.baseDir,
|
|
|
|
func(path string, info fs.FileInfo, err error) error {
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
if info.IsDir() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return iter(ctx, path, info)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *DataStorage) Dedupe(ctx context.Context) (uint64, error) {
|
|
|
|
ctx, span := tracer.Start(ctx, fmt.Sprintf("Dedupe"))
|
|
|
|
defer span.End()
|
|
|
|
|
2024-04-17 08:36:14 +00:00
|
|
|
log := s.log
|
2024-03-28 13:09:42 +00:00
|
|
|
|
|
|
|
sizeMap := map[int64][]string{}
|
|
|
|
err := s.iterFiles(ctx, func(ctx context.Context, path string, info fs.FileInfo) error {
|
|
|
|
size := info.Size()
|
|
|
|
sizeMap[size] = append(sizeMap[size], path)
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
maps.DeleteFunc(sizeMap, func(k int64, v []string) bool {
|
|
|
|
return len(v) <= 1
|
|
|
|
})
|
|
|
|
|
|
|
|
span.AddEvent("collected files with same size", trace.WithAttributes(
|
|
|
|
attribute.Int("count", len(sizeMap)),
|
|
|
|
))
|
|
|
|
|
|
|
|
var deduped uint64 = 0
|
|
|
|
|
|
|
|
i := 0
|
|
|
|
for _, paths := range sizeMap {
|
|
|
|
if i%100 == 0 {
|
|
|
|
log.Info("deduping in progress", "current", i, "total", len(sizeMap))
|
|
|
|
}
|
|
|
|
i++
|
|
|
|
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
return deduped, ctx.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
slices.Sort(paths)
|
|
|
|
paths = slices.Compact(paths)
|
|
|
|
if len(paths) <= 1 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
paths, err = applyErr(paths, filepath.Abs)
|
|
|
|
if err != nil {
|
|
|
|
return deduped, err
|
|
|
|
}
|
|
|
|
|
|
|
|
dedupedGroup, err := s.dedupeFiles(ctx, paths)
|
|
|
|
if err != nil {
|
|
|
|
log.Error("Error applying dedupe", "files", paths, "error", err.Error())
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if dedupedGroup > 0 {
|
|
|
|
deduped += dedupedGroup
|
|
|
|
log.Info("deduped file group",
|
|
|
|
slog.String("files", fmt.Sprint(paths)),
|
|
|
|
slog.String("deduped", humanize.Bytes(dedupedGroup)),
|
|
|
|
slog.String("deduped_total", humanize.Bytes(deduped)),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return deduped, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func applyErr[E, O any](in []E, apply func(E) (O, error)) ([]O, error) {
|
|
|
|
out := make([]O, 0, len(in))
|
|
|
|
for _, p := range in {
|
|
|
|
o, err := apply(p)
|
|
|
|
if err != nil {
|
|
|
|
return out, err
|
|
|
|
}
|
|
|
|
out = append(out, o)
|
|
|
|
|
|
|
|
}
|
|
|
|
return out, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// const blockSize uint64 = 4096
|
|
|
|
|
|
|
|
func (s *DataStorage) dedupeFiles(ctx context.Context, paths []string) (deduped uint64, err error) {
|
|
|
|
ctx, span := tracer.Start(ctx, fmt.Sprintf("dedupeFiles"), trace.WithAttributes(
|
|
|
|
attribute.StringSlice("files", paths),
|
|
|
|
))
|
|
|
|
defer func() {
|
|
|
|
span.SetAttributes(attribute.Int64("deduped", int64(deduped)))
|
|
|
|
if err != nil {
|
|
|
|
span.RecordError(err)
|
|
|
|
}
|
|
|
|
span.End()
|
|
|
|
}()
|
|
|
|
|
2024-04-17 08:36:14 +00:00
|
|
|
log := s.log
|
2024-03-28 13:09:42 +00:00
|
|
|
|
|
|
|
srcF, err := os.Open(paths[0])
|
|
|
|
if err != nil {
|
|
|
|
return deduped, err
|
|
|
|
}
|
|
|
|
defer srcF.Close()
|
|
|
|
srcStat, err := srcF.Stat()
|
|
|
|
if err != nil {
|
|
|
|
return deduped, err
|
|
|
|
}
|
|
|
|
|
|
|
|
srcFd := int(srcF.Fd())
|
|
|
|
srcSize := srcStat.Size()
|
|
|
|
|
|
|
|
fsStat := unix.Statfs_t{}
|
|
|
|
err = unix.Fstatfs(srcFd, &fsStat)
|
|
|
|
if err != nil {
|
|
|
|
span.RecordError(err)
|
|
|
|
return deduped, err
|
|
|
|
}
|
|
|
|
|
|
|
|
srcHash, err := filehash(srcF)
|
|
|
|
if err != nil {
|
|
|
|
return deduped, err
|
|
|
|
}
|
|
|
|
|
2024-03-29 06:53:52 +00:00
|
|
|
if int64(fsStat.Bsize) > srcSize { // for btrfs it means file in residing in not deduplicatable metadata
|
2024-03-28 13:09:42 +00:00
|
|
|
return deduped, nil
|
|
|
|
}
|
|
|
|
|
2024-03-29 06:53:52 +00:00
|
|
|
blockSize := uint64((srcSize % int64(fsStat.Bsize)) * int64(fsStat.Bsize))
|
2024-03-28 13:09:42 +00:00
|
|
|
|
|
|
|
span.SetAttributes(attribute.Int64("blocksize", int64(blockSize)))
|
|
|
|
|
|
|
|
rng := unix.FileDedupeRange{
|
|
|
|
Src_offset: 0,
|
|
|
|
Src_length: blockSize,
|
|
|
|
Info: []unix.FileDedupeRangeInfo{},
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, dst := range paths[1:] {
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
return deduped, ctx.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
destF, err := os.OpenFile(dst, os.O_RDWR, os.ModePerm)
|
|
|
|
if err != nil {
|
|
|
|
return deduped, err
|
|
|
|
}
|
|
|
|
defer destF.Close()
|
|
|
|
|
|
|
|
dstHash, err := filehash(destF)
|
|
|
|
if err != nil {
|
|
|
|
return deduped, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if srcHash != dstHash {
|
|
|
|
destF.Close()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
rng.Info = append(rng.Info, unix.FileDedupeRangeInfo{
|
|
|
|
Dest_fd: int64(destF.Fd()),
|
|
|
|
Dest_offset: 0,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(rng.Info) == 0 {
|
|
|
|
return deduped, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Info("found same files, deduping", "files", paths, "size", humanize.Bytes(uint64(srcStat.Size())))
|
|
|
|
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
return deduped, ctx.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
rng.Src_offset = 0
|
|
|
|
for i := range rng.Info {
|
|
|
|
rng.Info[i].Dest_offset = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
err = unix.IoctlFileDedupeRange(srcFd, &rng)
|
|
|
|
if err != nil {
|
|
|
|
return deduped, err
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range rng.Info {
|
|
|
|
deduped += rng.Info[i].Bytes_deduped
|
|
|
|
|
|
|
|
rng.Info[i].Status = 0
|
|
|
|
rng.Info[i].Bytes_deduped = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
return deduped, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
const compareBlockSize = 1024 * 128
|
|
|
|
|
|
|
|
func filehash(r io.Reader) ([20]byte, error) {
|
|
|
|
buf := make([]byte, compareBlockSize)
|
|
|
|
_, err := r.Read(buf)
|
|
|
|
if err != nil && err != io.EOF {
|
|
|
|
return [20]byte{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return sha1.Sum(buf), nil
|
2024-01-28 20:22:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func ptr[D any](v D) *D {
|
|
|
|
return &v
|
|
|
|
}
|