tstor/pkg/cowutils/dedupe.go

89 lines
1.7 KiB
Go
Raw Normal View History

2024-06-14 22:14:44 +00:00
package cowutils
import (
"context"
"os"
"golang.org/x/sys/unix"
)
func DedupeFiles(ctx context.Context, paths []string) (deduped uint64, err error) {
srcF, err := os.Open(paths[0])
if err != nil {
return deduped, err
}
defer srcF.Close()
srcStat, err := srcF.Stat()
if err != nil {
return deduped, err
}
srcFd := int(srcF.Fd())
srcSize := srcStat.Size()
fsStat := unix.Statfs_t{}
err = unix.Fstatfs(srcFd, &fsStat)
if err != nil {
return deduped, err
}
if int64(fsStat.Bsize) > srcSize { // for btrfs it means file residing in metadata and can't be deduplicated
return deduped, nil
}
blockSize := uint64((srcSize % int64(fsStat.Bsize)) * int64(fsStat.Bsize))
fdr := unix.FileDedupeRange{
Src_offset: 0,
Src_length: blockSize,
Info: []unix.FileDedupeRangeInfo{},
}
for _, dst := range paths[1:] {
if ctx.Err() != nil {
return deduped, ctx.Err()
}
destF, err := os.OpenFile(dst, os.O_RDWR, os.ModePerm)
if err != nil {
return deduped, err
}
// defer in cycle is intended, file must be closed only at the end of the function,
// and, most importantly, this keeps GC from closing descriptor while dudupe in progress
defer destF.Close()
fdr.Info = append(fdr.Info, unix.FileDedupeRangeInfo{
Dest_fd: int64(destF.Fd()),
Dest_offset: 0,
})
}
if len(fdr.Info) == 0 {
return deduped, nil
}
if ctx.Err() != nil {
return deduped, ctx.Err()
}
fdr.Src_offset = 0
for i := range fdr.Info {
fdr.Info[i].Dest_offset = 0
}
err = unix.IoctlFileDedupeRange(srcFd, &fdr)
if err != nil {
return deduped, err
}
for i := range fdr.Info {
deduped += fdr.Info[i].Bytes_deduped
fdr.Info[i].Status = 0
fdr.Info[i].Bytes_deduped = 0
}
return deduped, nil
}