This commit is contained in:
royalcat 2024-05-13 19:56:20 +03:00
parent 0d7aac068c
commit 974814c281
20 changed files with 1532 additions and 716 deletions

465
pkg/ytdlp/download.go Normal file
View file

@ -0,0 +1,465 @@
package ytdlp
import (
"bytes"
"context"
"encoding/json"
"fmt"
"os/exec"
"path"
"strings"
"github.com/royalcat/ctxprogress"
"golang.org/x/sync/errgroup"
)
type DownloadStatus string
const (
StatusDownloading DownloadStatus = "downloading"
StatusFinished DownloadStatus = "finished"
StatusErrored DownloadStatus = "error"
)
// Progress for the Running call
type DownloadProgress struct {
Status DownloadStatus `json:"status"`
Filename string `json:"filename"`
TmpFilename string `json:"tmpfilename"`
DownloadedBytes int64 `json:"downloaded_bytes"`
TotalBytes int64 `json:"total_bytes"`
TotalBytesEstimate float64 `json:"total_bytes_estimate"`
Elapsed float64 `json:"elapsed"`
ETA float64 `json:"eta"`
Speed float64 `json:"speed"`
FragmentIndex int64 `json:"fragment_index"`
FragmentCount int64 `json:"fragment_count"`
}
// Current implements ctxprogress.Progress.
func (d DownloadProgress) Progress() (int, int) {
if d.TotalBytes != -1 && d.TotalBytes != 0 && d.DownloadedBytes != -1 {
return int(d.DownloadedBytes), int(d.TotalBytes)
}
if d.TotalBytesEstimate != -1 && d.TotalBytesEstimate != 0 && d.DownloadedBytes != -1 {
return int(d.DownloadedBytes), int(d.TotalBytesEstimate)
}
return int(d.FragmentIndex), int(d.FragmentCount)
}
const rawProgressTemplate = `download:
%{
"status":"%(progress.status)s",
"eta":%(progress.eta|-1)s,
"speed":%(progress.speed|0)s,
"downloaded_bytes":%(progress.downloaded_bytes|-1)s,
"total_bytes": %(progress.total_bytes|-1)s,
"total_bytes_estimate": %(progress.total_bytes_estimate|-1)s,
"fragment_index":%(progress.fragment_index|-1)s,
"fragment_count":%(progress.fragment_count|-1)s
}`
var progressTemplate = strings.NewReplacer("\n", "", "\t", "", " ", "").Replace(rawProgressTemplate)
func (c *Client) Download(ctx context.Context, url string, dir string) error {
args := []string{
"--no-simulate", "-j",
"--progress", "--newline", "--progress-template", progressTemplate,
"-o", path.Join(dir, "%(title)s.%(ext)s"),
url,
}
group, ctx := errgroup.WithContext(ctx)
w, lines, err := lineReader(group)
if err != nil {
return err
}
var stderr bytes.Buffer
cmd := exec.CommandContext(ctx, c.binary, args...)
cmd.Stdout = w
cmd.Stderr = &stderr
group.Go(func() error {
err := cmd.Run()
defer w.Close()
if err != nil {
return err
}
return nil
})
for line := range lines {
if line, ok := strings.CutPrefix(line, "%"); ok {
p := DownloadProgress{}
err = json.Unmarshal([]byte(line), &p)
if err != nil {
//TODO: handle error
continue
}
ctxprogress.Set(ctx, p)
}
}
err = group.Wait()
if err != nil {
if _, ok := err.(*exec.ExitError); ok {
return fmt.Errorf("yt-dlp error: %s", stderr.Bytes())
}
return err
}
return nil
}
// // Used to deser the yt-dlp -J output
// type DownloadInfo struct {
// URL string `json:"url"`
// Title string `json:"title"`
// Thumbnail string `json:"thumbnail"`
// Resolution string `json:"resolution"`
// Size int32 `json:"filesize_approx"`
// VCodec string `json:"vcodec"`
// ACodec string `json:"acodec"`
// Extension string `json:"ext"`
// OriginalURL string `json:"original_url"`
// CreatedAt time.Time `json:"created_at"`
// }
// // Process descriptor
// type Process struct {
// Id string
// Url string
// Params []string
// OutputDir string
// Info DownloadInfo
// Progress DownloadProgress
// proc *os.Process
// Logger *slog.Logger
// }
// func NewProcess(dir string) (*Process, error) {
// }
// // Starts spawns/forks a new yt-dlp process and parse its stdout.
// // The process is spawned to outputting a custom progress text that
// // Resembles a JSON Object in order to Unmarshal it later.
// // This approach is anyhow not perfect: quotes are not escaped properly.
// // Each process is not identified by its PID but by a UUIDv4
// func (p *Process) Start() {
// // escape bash variable escaping and command piping, you'll never know
// // what they might come with...
// p.Params = slices.DeleteFunc(p.Params, func(e string) bool {
// match, _ := regexp.MatchString(`(\$\{)|(\&\&)`, e)
// return match
// })
// p.Params = slices.DeleteFunc(p.Params, func(e string) bool {
// return e == ""
// })
// if p.Output.Path != "" {
// out.Path = p.Output.Path
// }
// if p.Output.Filename != "" {
// out.Filename = p.Output.Filename
// }
// buildFilename(&p.Output)
// go p.GetFileName(&out)
// params := []string{
// strings.Split(p.Url, "?list")[0], //no playlist
// "--newline",
// "--no-colors",
// "--no-playlist",
// "--progress-template",
// strings.NewReplacer("\n", "", "\t", "", " ", "").Replace(template),
// }
// // if user asked to manually override the output path...
// if !(slices.Contains(params, "-P") || slices.Contains(params, "--paths")) {
// params = append(params, "-o")
// params = append(params, fmt.Sprintf("%s/%s", out.Path, out.Filename))
// }
// params = append(params, p.Params...)
// // ----------------- main block ----------------- //
// cmd := exec.Command(config.Instance().DownloaderPath, params...)
// cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
// r, err := cmd.StdoutPipe()
// if err != nil {
// p.Logger.Error(
// "failed to connect to stdout",
// slog.String("err", err.Error()),
// )
// panic(err)
// }
// err = cmd.Start()
// if err != nil {
// p.Logger.Error(
// "failed to start yt-dlp process",
// slog.String("err", err.Error()),
// )
// panic(err)
// }
// p.proc = cmd.Process
// // --------------- progress block --------------- //
// var (
// sourceChan = make(chan []byte)
// doneChan = make(chan struct{})
// )
// // spawn a goroutine that does the dirty job of parsing the stdout
// // filling the channel with as many stdout line as yt-dlp produces (producer)
// go func() {
// scan := bufio.NewScanner(r)
// defer func() {
// r.Close()
// p.Complete()
// doneChan <- struct{}{}
// close(sourceChan)
// close(doneChan)
// }()
// for scan.Scan() {
// sourceChan <- scan.Bytes()
// }
// }()
// // Slows down the unmarshal operation to every 500ms
// go func() {
// rx.Sample(time.Millisecond*500, sourceChan, doneChan, func(event []byte) {
// var progress ProgressTemplate
// if err := json.Unmarshal(event, &progress); err != nil {
// return
// }
// p.Progress = DownloadProgress{
// Status: StatusDownloading,
// Percentage: progress.Percentage,
// Speed: progress.Speed,
// ETA: progress.Eta,
// }
// p.Logger.Info("progress",
// slog.String("id", p.getShortId()),
// slog.String("url", p.Url),
// slog.String("percentage", progress.Percentage),
// )
// })
// }()
// // ------------- end progress block ------------- //
// cmd.Wait()
// }
// // Keep process in the memoryDB but marks it as complete
// // Convention: All completed processes has progress -1
// // and speed 0 bps.
// func (p *Process) Complete() {
// p.Progress = DownloadProgress{
// Status: StatusCompleted,
// Percentage: "-1",
// Speed: 0,
// ETA: 0,
// }
// p.Logger.Info("finished",
// slog.String("id", p.getShortId()),
// slog.String("url", p.Url),
// )
// }
// // Kill a process and remove it from the memory
// func (p *Process) Kill() error {
// // yt-dlp uses multiple child process the parent process
// // has been spawned with setPgid = true. To properly kill
// // all subprocesses a SIGTERM need to be sent to the correct
// // process group
// if p.proc != nil {
// pgid, err := syscall.Getpgid(p.proc.Pid)
// if err != nil {
// return err
// }
// err = syscall.Kill(-pgid, syscall.SIGTERM)
// p.Logger.Info("killed process", slog.String("id", p.Id))
// return err
// }
// return nil
// }
// // Returns the available format for this URL
// func (p *Process) GetFormatsSync() (DownloadFormats, error) {
// cmd := exec.Command(config.Instance().DownloaderPath, p.Url, "-J")
// stdout, err := cmd.Output()
// if err != nil {
// p.Logger.Error(
// "failed to retrieve metadata",
// slog.String("err", err.Error()),
// )
// return DownloadFormats{}, err
// }
// info := DownloadFormats{URL: p.Url}
// best := Format{}
// var (
// wg sync.WaitGroup
// decodingError error
// )
// wg.Add(2)
// log.Println(
// cli.BgRed, "Metadata", cli.Reset,
// cli.BgBlue, "Formats", cli.Reset,
// p.Url,
// )
// p.Logger.Info(
// "retrieving metadata",
// slog.String("caller", "getFormats"),
// slog.String("url", p.Url),
// )
// go func() {
// decodingError = json.Unmarshal(stdout, &info)
// wg.Done()
// }()
// go func() {
// decodingError = json.Unmarshal(stdout, &best)
// wg.Done()
// }()
// wg.Wait()
// if decodingError != nil {
// return DownloadFormats{}, err
// }
// info.Best = best
// return info, nil
// }
// func (p *Process) GetFileName(o *DownloadOutput) error {
// cmd := exec.Command(
// config.Instance().DownloaderPath,
// "--print", "filename",
// "-o", fmt.Sprintf("%s/%s", o.Path, o.Filename),
// p.Url,
// )
// cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
// out, err := cmd.Output()
// if err != nil {
// return err
// }
// p.Output.SavedFilePath = strings.Trim(string(out), "\n")
// return nil
// }
// func (p *Process) SetPending() {
// // Since video's title isn't available yet, fill in with the URL.
// p.Info = DownloadInfo{
// URL: p.Url,
// Title: p.Url,
// CreatedAt: time.Now(),
// }
// p.Progress.Status = StatusPending
// }
// func (p *Process) SetMetadata() error {
// cmd := exec.Command(config.Instance().DownloaderPath, p.Url, "-J")
// cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
// stdout, err := cmd.StdoutPipe()
// if err != nil {
// p.Logger.Error("failed to connect to stdout",
// slog.String("id", p.getShortId()),
// slog.String("url", p.Url),
// slog.String("err", err.Error()),
// )
// return err
// }
// stderr, err := cmd.StderrPipe()
// if err != nil {
// p.Logger.Error("failed to connect to stderr",
// slog.String("id", p.getShortId()),
// slog.String("url", p.Url),
// slog.String("err", err.Error()),
// )
// return err
// }
// info := DownloadInfo{
// URL: p.Url,
// CreatedAt: time.Now(),
// }
// if err := cmd.Start(); err != nil {
// return err
// }
// var bufferedStderr bytes.Buffer
// go func() {
// io.Copy(&bufferedStderr, stderr)
// }()
// p.Logger.Info("retrieving metadata",
// slog.String("id", p.getShortId()),
// slog.String("url", p.Url),
// )
// if err := json.NewDecoder(stdout).Decode(&info); err != nil {
// return err
// }
// p.Info = info
// p.Progress.Status = StatusPending
// if err := cmd.Wait(); err != nil {
// return errors.New(bufferedStderr.String())
// }
// return nil
// }
// func (p *Process) getShortId() string {
// return strings.Split(p.Id, "-")[0]
// }
// func buildFilename(o *DownloadOutput) {
// if o.Filename != "" && strings.Contains(o.Filename, ".%(ext)s") {
// o.Filename += ".%(ext)s"
// }
// o.Filename = strings.Replace(
// o.Filename,
// ".%(ext)s.%(ext)s",
// ".%(ext)s",
// 1,
// )
// }

View file

@ -0,0 +1,26 @@
package ytdlp_test
import (
"context"
"fmt"
"testing"
"git.kmsign.ru/royalcat/tstor/pkg/ytdlp"
"github.com/royalcat/ctxprogress"
"github.com/stretchr/testify/require"
)
func TestYtDlp(t *testing.T) {
require := require.New(t)
ctx := context.Background()
c, err := ytdlp.New()
require.NoError(err)
ctx = ctxprogress.New(ctx)
ctxprogress.AddCallback(ctx, func(p ctxprogress.Progress) {
cur, total := p.Progress()
fmt.Printf("%d/%d\n", cur, total)
})
err = c.Download(ctx, "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "rickroll3")
require.NoError(err)
}

165
pkg/ytdlp/model.go Normal file
View file

@ -0,0 +1,165 @@
package ytdlp
type PlaylistEntry struct {
ID string `json:"id"`
Uploader string `json:"uploader"`
UploaderID string `json:"uploader_id"`
UploadDate string `json:"upload_date"`
Title string `json:"title"`
Thumbnail string `json:"thumbnail"`
Duration int64 `json:"duration"`
LikeCount int64 `json:"like_count"`
DislikeCount int64 `json:"dislike_count"`
CommentCount int64 `json:"comment_count"`
Formats []Format `json:"formats"`
AgeLimit int64 `json:"age_limit"`
Tags []string `json:"tags"`
Categories []string `json:"categories"`
Cast []any `json:"cast"`
Subtitles Subtitles `json:"subtitles"`
Thumbnails []Thumbnail `json:"thumbnails"`
Timestamp int64 `json:"timestamp"`
ViewCount int64 `json:"view_count"`
WebpageURL string `json:"webpage_url"`
OriginalURL string `json:"original_url"`
WebpageURLBasename string `json:"webpage_url_basename"`
WebpageURLDomain string `json:"webpage_url_domain"`
Extractor string `json:"extractor"`
ExtractorKey string `json:"extractor_key"`
PlaylistCount int64 `json:"playlist_count"`
Playlist string `json:"playlist"`
PlaylistID string `json:"playlist_id"`
PlaylistTitle string `json:"playlist_title"`
PlaylistUploader string `json:"playlist_uploader"`
PlaylistUploaderID string `json:"playlist_uploader_id"`
NEntries int64 `json:"n_entries"`
PlaylistIndex int64 `json:"playlist_index"`
PlaylistAutonumber int64 `json:"playlist_autonumber"`
DisplayID string `json:"display_id"`
Fulltitle string `json:"fulltitle"`
DurationString string `json:"duration_string"`
ReleaseYear int `json:"release_year"`
Epoch int64 `json:"epoch"`
FormatID string `json:"format_id"`
URL string `json:"url"`
ManifestURL string `json:"manifest_url"`
Tbr float64 `json:"tbr"`
EXT EXT `json:"ext"`
FPS float64 `json:"fps"`
Protocol Protocol `json:"protocol"`
VideoHasDRM bool `json:"has_drm"`
Width int64 `json:"width"`
Height int64 `json:"height"`
Vcodec string `json:"vcodec"`
Acodec string `json:"acodec"`
DynamicRange DynamicRange `json:"dynamic_range"`
Resolution string `json:"resolution"`
AspectRatio float64 `json:"aspect_ratio"`
HTTPHeaders HTTPHeaders `json:"http_headers"`
VideoEXT EXT `json:"video_ext"`
AudioEXT AudioEXT `json:"audio_ext"`
Format string `json:"format"`
Filename string `json:"_filename"`
VideoFilename string `json:"filename"`
Type string `json:"_type"`
Version Version `json:"_version"`
}
// Progress implements ctxprogress.Progress.
func (p PlaylistEntry) Progress() (current int, total int) {
return int(p.PlaylistIndex), int(p.PlaylistCount)
}
type Format struct {
URL string `json:"url"`
FormatID string `json:"format_id"`
Height int64 `json:"height"`
EXT EXT `json:"ext"`
Protocol Protocol `json:"protocol"`
Resolution string `json:"resolution"`
DynamicRange DynamicRange `json:"dynamic_range"`
AspectRatio *float64 `json:"aspect_ratio"`
FilesizeApprox any `json:"filesize_approx"`
HTTPHeaders HTTPHeaders `json:"http_headers"`
VideoEXT EXT `json:"video_ext"`
AudioEXT AudioEXT `json:"audio_ext"`
Vbr any `json:"vbr"`
ABR any `json:"abr"`
Tbr *float64 `json:"tbr"`
Format string `json:"format"`
FormatIndex any `json:"format_index"`
ManifestURL *string `json:"manifest_url,omitempty"`
FPS *float64 `json:"fps,omitempty"`
Preference any `json:"preference"`
Quality any `json:"quality"`
HasDRM *bool `json:"has_drm,omitempty"`
Width *int64 `json:"width,omitempty"`
Vcodec *string `json:"vcodec,omitempty"`
Acodec *string `json:"acodec,omitempty"`
}
type HTTPHeaders struct {
UserAgent string `json:"User-Agent"`
Accept Accept `json:"Accept"`
AcceptLanguage AcceptLanguage `json:"Accept-Language"`
SECFetchMode SECFetchMode `json:"Sec-Fetch-Mode"`
}
type Subtitles struct {
}
type Thumbnail struct {
URL string `json:"url"`
ID string `json:"id"`
}
type Version struct {
Version string `json:"version"`
CurrentGitHead string `json:"current_git_head"`
ReleaseGitHead string `json:"release_git_head"`
Repository string `json:"repository"`
}
type AudioEXT string
const (
None AudioEXT = "none"
)
type DynamicRange string
const (
SDR DynamicRange = "SDR"
HDR DynamicRange = "HDR"
)
type EXT string
const (
Mp4 EXT = "mp4"
)
type Accept string
const (
TextHTMLApplicationXHTMLXMLApplicationXMLQ09Q08 Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
)
type AcceptLanguage string
const (
EnUsEnQ05 AcceptLanguage = "en-us,en;q=0.5"
)
type SECFetchMode string
const (
Navigate SECFetchMode = "navigate"
)
type Protocol string
const (
HTTPS Protocol = "https"
M3U8Native Protocol = "m3u8_native"
)

164
pkg/ytdlp/playlist.go Normal file
View file

@ -0,0 +1,164 @@
package ytdlp
import (
"bufio"
"context"
"encoding/json"
"errors"
"io"
"os"
"os/exec"
"strings"
"github.com/royalcat/ctxprogress"
"golang.org/x/sync/errgroup"
)
type Client struct {
binary string
}
func New() (*Client, error) {
return &Client{binary: "yt-dlp"}, nil
}
func (yt *Client) Playlist(ctx context.Context, url string) ([]PlaylistEntry, error) {
group, ctx := errgroup.WithContext(ctx)
w, lines, err := lineReader(group)
if err != nil {
return nil, err
}
cmd := exec.CommandContext(ctx, yt.binary, "-j", url)
cmd.Stdout = w
group.Go(func() error {
err := cmd.Run()
if err != nil {
return err
}
return w.Close()
})
playlists := []PlaylistEntry{}
for line := range lines {
entry := PlaylistEntry{}
err = json.Unmarshal([]byte(line), &entry)
if err != nil {
return nil, err
}
playlists = append(playlists, entry)
}
return playlists, nil
}
// func DownloadPlaylist(ctx context.Context, url string, dir string) error {
// args := []string{
// "--no-simulate", "-j",
// "--progress", "--newline", "--progress-template", progressTemplate,
// "-o", path.Join(dir, "%(title)s.%(ext)s"),
// url,
// }
// group, ctx := errgroup.WithContext(ctx)
// pr, w := io.Pipe()
// cmd := exec.CommandContext(ctx, "yt-dlp", args...)
// cmd.Stdout = w
// r := io.TeeReader(pr, os.Stdout)
// group.Go(func() error {
// reader := bufio.NewReader(r)
// for {
// line, err := reader.ReadString('\n')
// if err != nil {
// if err == io.EOF {
// return nil
// }
// return err
// }
// line = strings.Trim(line, " \r\t")
// if len(line) == 0 {
// continue
// }
// if strings.HasPrefix(line, "{") {
// item := &PlaylistEntry{}
// err = json.Unmarshal([]byte(line), &item)
// if err != nil {
// return err
// }
// } else if body, ok := strings.CutPrefix(line, "%"); ok {
// p := &DownloadProgress{}
// err = json.Unmarshal([]byte(body), &p)
// if err != nil {
// return err
// }
// } else {
// return fmt.Errorf("Failed to parse output, unkonow first symbol: %v", string([]rune(line)[0]))
// }
// }
// })
// group.Go(func() error {
// err := cmd.Run()
// defer w.Close()
// if err != nil {
// return err
// }
// return nil
// })
// return group.Wait()
// }
func lineReader(group *errgroup.Group) (io.WriteCloser, <-chan string, error) {
lines := make(chan string)
var r io.Reader
r, w := io.Pipe()
r = io.TeeReader(r, os.Stdout)
group.Go(func() error {
defer close(lines)
reader := bufio.NewReader(r)
for {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
return w.Close()
}
return errors.Join(err, w.Close())
}
line = strings.Trim(line, " \r\t")
if line == "" {
continue
}
lines <- line
}
})
return w, lines, nil
}
var _ ctxprogress.Progress = (*PlaylistEntry)(nil)
var _ ctxprogress.Progress = (*DownloadProgress)(nil)
func parseProgress(line string) (ctxprogress.Progress, error) {
line = strings.Trim(line, " \r\t")
p := &DownloadProgress{}
err := json.Unmarshal([]byte(line), p)
if err != nil {
return nil, err
}
return p, nil
}

View file

@ -0,0 +1,27 @@
package ytdlp_test
import (
"context"
"errors"
"testing"
"git.kmsign.ru/royalcat/tstor/pkg/ytdlp"
"github.com/stretchr/testify/require"
)
func TestPlaylist(t *testing.T) {
require := require.New(t)
ctx := context.Background()
if deadline, ok := t.Deadline(); ok {
var cancel context.CancelFunc
ctx, cancel = context.WithDeadlineCause(ctx, deadline, errors.New("test deadline done"))
defer cancel()
}
client, err := ytdlp.New()
require.NoError(err)
entries, err := client.Playlist(ctx, "https://www.youtube.com/playlist?list=PLUay9m6GhoyCXdloEa-VYtnVeshaKl4AW")
require.NoError(err)
require.NotEmpty(entries)
require.Len(entries, int(entries[0].PlaylistCount))
}