corbin@infosec.pub to Open Source@lemmy.mlEnglish · 5 months agoYt-dlp is the best way to download videos and audiowww.spacebar.newsexternal-linkmessage-square88fedilinkarrow-up1515arrow-down19
arrow-up1506arrow-down1external-linkYt-dlp is the best way to download videos and audiowww.spacebar.newscorbin@infosec.pub to Open Source@lemmy.mlEnglish · 5 months agomessage-square88fedilink
minus-square☆ Yσɠƚԋσʂ ☆@lemmy.mlcakelinkfedilinkarrow-up6·5 months agoI made a script for grabbing reddit videos that’s been working pretty well for me, needs Babashka to run https://babashka.org/ #!/usr/bin/env bb (require '[clojure.java.shell :refer [sh]] '[clojure.string :as string] '[cheshire.core :as cheshire] '[org.httpkit.client :as http] '[clojure.walk :as walk]) (defn http-get [url] (-> @(http/get url {}) :body)) (defn find-base-url [data] (let [results (atom [])] (walk/postwalk (fn [node] (when (and (string? node) (.contains node "DASH")) (swap! results conj node)) node) data) (some-> @results first (string/replace #"DASH_[0-9]+\.mp4" "")))) (defn find-best-quality [names audio?] (->> ((if audio? filter remove) #(.contains (.toLowerCase %) "audio") names) (sort-by (fn [n] (-> n (string/replace #"\.mp4" "") (string/replace #"[a-zA-Z_]" "") (Integer/parseInt)))) (last))) (defn find-parts [base-url data] (let [url (atom nil) _ (walk/prewalk (fn [node] (when (and (map? node) (contains? node :dash_url)) (reset! url (:dash_url node))) node) data) xml (http-get @url) parts (->> (re-seq #"<BaseURL>(.*?)</BaseURL>" xml) (map second)) best-video (find-best-quality parts false) best-audio (find-best-quality parts true)] [(str base-url best-video) (str base-url best-audio)])) (defn filename [url] (let [idx (inc (.lastIndexOf url "/"))] (subs url idx))) (defn tsname [] (str "video-" (System/currentTimeMillis) ".mp4")) (let [data (-> (first *command-line-args*) (str ".json") http-get (cheshire/decode true)) base-url (find-base-url data) [video-url audio-url] (find-parts base-url data) video-file (filename video-url) audio-file (filename audio-url)] (sh "wget" video-url) (sh "wget" audio-url) (sh "ffmpeg" "-i" video-file "-stream_loop" "-1" "-i" audio-file "-shortest" "-map" "0:v:0" "-map" "1:a:0" "-y" (tsname)) (sh "rm" audio-file video-file))
I made a script for grabbing reddit videos that’s been working pretty well for me, needs Babashka to run https://babashka.org/
#!/usr/bin/env bb (require '[clojure.java.shell :refer [sh]] '[clojure.string :as string] '[cheshire.core :as cheshire] '[org.httpkit.client :as http] '[clojure.walk :as walk]) (defn http-get [url] (-> @(http/get url {}) :body)) (defn find-base-url [data] (let [results (atom [])] (walk/postwalk (fn [node] (when (and (string? node) (.contains node "DASH")) (swap! results conj node)) node) data) (some-> @results first (string/replace #"DASH_[0-9]+\.mp4" "")))) (defn find-best-quality [names audio?] (->> ((if audio? filter remove) #(.contains (.toLowerCase %) "audio") names) (sort-by (fn [n] (-> n (string/replace #"\.mp4" "") (string/replace #"[a-zA-Z_]" "") (Integer/parseInt)))) (last))) (defn find-parts [base-url data] (let [url (atom nil) _ (walk/prewalk (fn [node] (when (and (map? node) (contains? node :dash_url)) (reset! url (:dash_url node))) node) data) xml (http-get @url) parts (->> (re-seq #"<BaseURL>(.*?)</BaseURL>" xml) (map second)) best-video (find-best-quality parts false) best-audio (find-best-quality parts true)] [(str base-url best-video) (str base-url best-audio)])) (defn filename [url] (let [idx (inc (.lastIndexOf url "/"))] (subs url idx))) (defn tsname [] (str "video-" (System/currentTimeMillis) ".mp4")) (let [data (-> (first *command-line-args*) (str ".json") http-get (cheshire/decode true)) base-url (find-base-url data) [video-url audio-url] (find-parts base-url data) video-file (filename video-url) audio-file (filename audio-url)] (sh "wget" video-url) (sh "wget" audio-url) (sh "ffmpeg" "-i" video-file "-stream_loop" "-1" "-i" audio-file "-shortest" "-map" "0:v:0" "-map" "1:a:0" "-y" (tsname)) (sh "rm" audio-file video-file))