From 001536c975468526f124e10e431617e1e7d85e76 Mon Sep 17 00:00:00 2001 From: kolaczyn Date: Thu, 25 Jun 2020 17:55:33 +0200 Subject: [PATCH] this is a pretty clean versio, but there needs to be a lot done --- gen-script.py | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/gen-script.py b/gen-script.py index 857a526..d2732dc 100755 --- a/gen-script.py +++ b/gen-script.py @@ -1,15 +1,22 @@ #!/usr/bin/python +#TODO give an option to use longer version +# add subtitles download support and conversion +# support different subtitles extensions +# add video download support +# support subs and vids renaming +# make program organize file +# add alternative versions +# passing arguments into program; get rid of hardcoded values +# fix def write_debug_file(data), so it generates correct values + +# check if you tell ffmpeg after the video is over - how does it handle it? +# if yes, add r_clamp(.). I would have to get the length of the videos + import re import os from datetime import timedelta -#TODO give an option to use longer version -# the fast version should be for testing only a -# implement fast better quality version. will have to use -# make it so the application dowloads subtitles and videos and names them correctly -#check if you tell ffmpeg after the video is over - how does it handle it? - # left clamps. used to make sure that eg. 00:00:02 doesn't become 23:59:52 def l_clamps(t, delta): if t < delta: @@ -19,7 +26,7 @@ def l_clamps(t, delta): # if two chunks overlap, they get merged def merge_overlap(data): - prev=data[-1] # it should work for now, but i should find a better solution + prev=data[-1] # might delete this line late #this also works, but for surely there's a better way to do this for i, cur in enumerate(data): if prev['fname']==cur['fname'] and prev['end'] > cur['beg'] - sides: # that means we have to merge them @@ -28,7 +35,7 @@ def merge_overlap(data): data[i-1]={} prev = cur - +# it collects data of clips which contain searched phrase def generate_splice_data(file_names): data = [] for i, f in enumerate(file_names): @@ -38,13 +45,15 @@ def generate_splice_data(file_names): data.append({ 'fname':f, #file name 'beg':l_clamps(timedelta(minutes = int(prev[3 :5]), seconds=int(prev[6: 8])), sides), #beginning timestamp - 'end':timedelta(minutes = int(prev[20:22]), seconds=int(prev[23:25])), #ending timestamp + 'end':timedelta(minutes = int(prev[20:22]), seconds=int(prev[23:25])) + sides, #ending timestamp 'desc':line[:-1]}) #lines said if i%4 == 1: prev = line # a timestamp line return data +# it generates rip.sh - script which cut nessecsary part +# i dunno how to spell nessessery def write_list_rip(data): file_list = open('list.txt', 'w') file_rip = open('rip.sh', 'w') @@ -55,11 +64,19 @@ def write_list_rip(data): outname ="../out/{}-{:0>2}.mp4".format(name, i) file_list.write("file '{}'\n".format(outname)) - delta=d['end']-d['beg']+timedelta(seconds=6) - file_rip .write('ffmpeg -ss {} -i ../original/{}.mp4 -to 0{} -c copy {}\n'.format(d['beg'], name, d['end']-d['beg'],outname)) # fast but not accurate - # file_rip .write('ffmpeg -i ../original/{}.mp4 -ss 0{} -t 0{} -async 1 {}\n'.format(name, d['beg'], delta ,outname)) # slow but more accurate + delta = d['end']-d['beg'] + + # painfully slow + # file_rip.write('ffmpeg -i ../original/{}.mp4 -ss 0{} -t 0{} -async 1 {}\n'.format(name, d['beg'], delta ,outname)) # slow but more accurate + + # very fast, but there are still 'bugs' + true_beg = l_clamps(d['beg'], back) + d1 = d['beg'] - true_beg # we have to do it his was just in case the clip is at the beginning + file_rip.write('ffmpeg -ss 0{} -i ../original/{}.mp4 -ss 0{} -t 0{} -c copy {}\n'.format(true_beg, name, d1, delta, outname)) # fast but not accurate +# generates a debug file - I can see how the clips are laid out +# at the moment it doesnt work correcty. fix that def write_debug_file(data): file_debug = open('debug.txt', 'w') for d in data: @@ -69,6 +86,7 @@ def write_debug_file(data): if __name__ == "__main__": search = '(pesky bird)' # the searched phrase sides = timedelta(seconds=10) # time we cut on both sides + back = timedelta(seconds=60) # how much do we want to go to get better keyframes. dunno how to explain this in two sentences path = os.getcwd() + '/../subs/' # subtitles location file_names = sorted(os.listdir(path))