From 001536c975468526f124e10e431617e1e7d85e76 Mon Sep 17 00:00:00 2001
From: kolaczyn <kolaczyn@protonmail.com>
Date: Thu, 25 Jun 2020 17:55:33 +0200
Subject: [PATCH] this is a pretty clean versio, but there needs to be a lot
 done

---
 gen-script.py | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/gen-script.py b/gen-script.py
index 857a526..d2732dc 100755
--- a/gen-script.py
+++ b/gen-script.py
@@ -1,15 +1,22 @@
 #!/usr/bin/python
 
+#TODO give an option to use longer version
+# add subtitles download support and conversion
+# support different subtitles extensions
+# add video download support
+# support subs and vids renaming
+# make program organize file
+# add alternative versions
+# passing arguments into program; get rid of hardcoded values
+# fix def write_debug_file(data), so it generates correct values
+
+# check if you tell ffmpeg after the video is over - how does it handle it?
+# if yes, add r_clamp(.). I would have to get the length of the videos
+
 import re
 import os
 from datetime import timedelta
 
-#TODO give an option to use longer version
-# the fast version should be for testing only a
-# implement fast better quality version. will have to use
-# make it so the application dowloads subtitles and videos and names them correctly
-#check if you tell ffmpeg after the video is over - how does it handle it?
-
 # left clamps. used to make sure that eg. 00:00:02 doesn't become 23:59:52
 def l_clamps(t, delta):
     if t < delta:
@@ -19,7 +26,7 @@ def l_clamps(t, delta):
 
  # if two chunks overlap, they get merged
 def merge_overlap(data):
-    prev=data[-1] # it should work for now, but i should find a better solution
+    prev=data[-1] # might delete this line late
     #this also works, but for surely there's a better way to do this
     for i, cur in enumerate(data):
         if prev['fname']==cur['fname'] and prev['end'] > cur['beg'] - sides: # that means we have to merge them
@@ -28,7 +35,7 @@ def merge_overlap(data):
             data[i-1]={}
         prev = cur
 
-
+# it collects data of clips which contain searched phrase
 def generate_splice_data(file_names):
     data = []
     for i, f in enumerate(file_names):
@@ -38,13 +45,15 @@ def generate_splice_data(file_names):
                 data.append({
                 'fname':f,                                                                                  #file name
                 'beg':l_clamps(timedelta(minutes = int(prev[3  :5]), seconds=int(prev[6:  8])), sides),     #beginning timestamp
-                'end':timedelta(minutes = int(prev[20:22]), seconds=int(prev[23:25])),                      #ending timestamp
+                'end':timedelta(minutes = int(prev[20:22]), seconds=int(prev[23:25])) + sides,              #ending timestamp
                 'desc':line[:-1]})                                                                          #lines said
             if i%4 == 1:
                 prev = line # a timestamp line
     return data
 
 
+# it generates rip.sh - script which cut nessecsary part
+# i dunno how to spell nessessery
 def write_list_rip(data):
     file_list = open('list.txt', 'w')
     file_rip = open('rip.sh', 'w')
@@ -55,11 +64,19 @@ def write_list_rip(data):
             outname ="../out/{}-{:0>2}.mp4".format(name, i)
 
             file_list.write("file '{}'\n".format(outname))
-            delta=d['end']-d['beg']+timedelta(seconds=6)
-            file_rip .write('ffmpeg -ss {} -i ../original/{}.mp4 -to 0{} -c copy {}\n'.format(d['beg'], name, d['end']-d['beg'],outname)) # fast but not accurate
-            # file_rip .write('ffmpeg -i ../original/{}.mp4 -ss 0{} -t 0{} -async 1 {}\n'.format(name, d['beg'], delta ,outname)) # slow but more accurate
+            delta = d['end']-d['beg']
+
+            # painfully slow
+            # file_rip.write('ffmpeg -i ../original/{}.mp4 -ss 0{} -t 0{} -async 1 {}\n'.format(name, d['beg'], delta ,outname)) # slow but more accurate
+
+            # very fast, but there are still 'bugs'
+            true_beg = l_clamps(d['beg'], back)
+            d1 = d['beg'] - true_beg # we have to do it his was just in case the clip is at the beginning
+            file_rip.write('ffmpeg -ss 0{} -i ../original/{}.mp4 -ss 0{} -t 0{} -c copy {}\n'.format(true_beg, name, d1, delta, outname)) # fast but not accurate
 
 
+# generates a debug file - I can see how the clips are laid out
+# at the moment it doesnt work correcty. fix that
 def write_debug_file(data):
     file_debug = open('debug.txt', 'w')
     for d in data:
@@ -69,6 +86,7 @@ def write_debug_file(data):
 if __name__ == "__main__":
     search = '(pesky bird)' # the searched phrase
     sides = timedelta(seconds=10) # time we cut on both sides
+    back  = timedelta(seconds=60) # how much do we want to go to get better keyframes. dunno how to explain this in two sentences
     path = os.getcwd() + '/../subs/' # subtitles location
 
     file_names = sorted(os.listdir(path))