this is a pretty clean versio, but there needs to be a lot done

2020-06-25 17:55:33 +02:00
parent ec32421d92
commit 001536c975
1 changed files with 30 additions and 12 deletions
--- a/gen-script.py
+++ b/gen-script.py
@ -1,15 +1,22 @@
 #!/usr/bin/python

+#TODO give an option to use longer version
+# add subtitles download support and conversion
+# support different subtitles extensions
+# add video download support
+# support subs and vids renaming
+# make program organize file
+# add alternative versions
+# passing arguments into program; get rid of hardcoded values
+# fix def write_debug_file(data), so it generates correct values
+
+# check if you tell ffmpeg after the video is over - how does it handle it?
+# if yes, add r_clamp(.). I would have to get the length of the videos
+
 import re
 import os
 from datetime import timedelta

-#TODO give an option to use longer version
-# the fast version should be for testing only a
-# implement fast better quality version. will have to use
-# make it so the application dowloads subtitles and videos and names them correctly
-#check if you tell ffmpeg after the video is over - how does it handle it?
-
 # left clamps. used to make sure that eg. 00:00:02 doesn't become 23:59:52
 def l_clamps(t, delta):
    if t < delta:
@ -19,7 +26,7 @@ def l_clamps(t, delta):

 # if two chunks overlap, they get merged
 def merge_overlap(data):
-    prev=data[-1] # it should work for now, but i should find a better solution
+    prev=data[-1] # might delete this line late
    #this also works, but for surely there's a better way to do this
    for i, cur in enumerate(data):
        if prev['fname']==cur['fname'] and prev['end'] > cur['beg'] - sides: # that means we have to merge them
@ -28,7 +35,7 @@ def merge_overlap(data):
            data[i-1]={}
        prev = cur

-
+# it collects data of clips which contain searched phrase
 def generate_splice_data(file_names):
    data = []
    for i, f in enumerate(file_names):
@ -38,13 +45,15 @@ def generate_splice_data(file_names):
                data.append({
                'fname':f,                                                                                  #file name
                'beg':l_clamps(timedelta(minutes = int(prev[3  :5]), seconds=int(prev[6:  8])), sides),     #beginning timestamp
-                'end':timedelta(minutes = int(prev[20:22]), seconds=int(prev[23:25])),                      #ending timestamp
+                'end':timedelta(minutes = int(prev[20:22]), seconds=int(prev[23:25])) + sides,              #ending timestamp
                'desc':line[:-1]})                                                                          #lines said
            if i%4 == 1:
                prev = line # a timestamp line
    return data


+# it generates rip.sh - script which cut nessecsary part
+# i dunno how to spell nessessery
 def write_list_rip(data):
    file_list = open('list.txt', 'w')
    file_rip = open('rip.sh', 'w')
@ -55,11 +64,19 @@ def write_list_rip(data):
            outname ="../out/{}-{:0>2}.mp4".format(name, i)

            file_list.write("file '{}'\n".format(outname))
-            delta=d['end']-d['beg']+timedelta(seconds=6)
-            file_rip .write('ffmpeg -ss {} -i ../original/{}.mp4 -to 0{} -c copy {}\n'.format(d['beg'], name, d['end']-d['beg'],outname)) # fast but not accurate
-            # file_rip .write('ffmpeg -i ../original/{}.mp4 -ss 0{} -t 0{} -async 1 {}\n'.format(name, d['beg'], delta ,outname)) # slow but more accurate
+            delta = d['end']-d['beg']
+
+            # painfully slow
+            # file_rip.write('ffmpeg -i ../original/{}.mp4 -ss 0{} -t 0{} -async 1 {}\n'.format(name, d['beg'], delta ,outname)) # slow but more accurate
+
+            # very fast, but there are still 'bugs'
+            true_beg = l_clamps(d['beg'], back)
+            d1 = d['beg'] - true_beg # we have to do it his was just in case the clip is at the beginning
+            file_rip.write('ffmpeg -ss 0{} -i ../original/{}.mp4 -ss 0{} -t 0{} -c copy {}\n'.format(true_beg, name, d1, delta, outname)) # fast but not accurate


+# generates a debug file - I can see how the clips are laid out
+# at the moment it doesnt work correcty. fix that
 def write_debug_file(data):
    file_debug = open('debug.txt', 'w')
    for d in data:
@ -69,6 +86,7 @@ def write_debug_file(data):
 if __name__ == "__main__":
    search = '(pesky bird)' # the searched phrase
    sides = timedelta(seconds=10) # time we cut on both sides
+    back  = timedelta(seconds=60) # how much do we want to go to get better keyframes. dunno how to explain this in two sentences
    path = os.getcwd() + '/../subs/' # subtitles location

    file_names = sorted(os.listdir(path))