Implemented proper argv handling

Variables to snake_case
Deleted old files
2020-10-27 01:31:43 +01:00 · 2020-10-27 00:44:03 +01:00 · 2020-10-27 00:32:01 +01:00 · 2020-10-09 16:06:14 +02:00 · 2020-10-08 20:56:01 +02:00 · 2020-09-24 01:09:09 +02:00
15 changed files with 178 additions and 157 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,10 @@
+out/
+subs/
 list.txt
-rip.sh
+rip*
 debug.txt
 output.mp4
+.vscode
+sumout/
+out*
+__pycache__
--- a/0
+++ b/0
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# Rough Editor
+# The README is not accurate at the moment!

 ## Description

--- a/clean.sh
+++ b/clean.sh
@ -1,6 +0,0 @@
-#!/bin/bash
-
-rm list.txt rip.sh output.mp4 > /dev/null 2>&1
-rm -r ../out > /dev/null 2>&1
-mkdir ../out > /dev/null 2>&1
-echo "clean.sh finished its job"
--- a/convert.sh
+++ b/convert.sh
@ -1,13 +0,0 @@
-#!/bin/bash
-
-# it converts subs from .vtt to .srt
-
-for filename in ../subs/*; do
-	file="${filename%.*}";
-	in="${file}.vtt"
-	out="${file}.srt"
-	ffmpeg -i $in $out
-	done
-
-rm ../subs/*vtt	
-echo "convert.sh finished its job"
--- a/delete_vidoes_without_sub.sh
+++ b/delete_vidoes_without_sub.sh
@ -1,13 +0,0 @@
-#!/bin/sh
-cd ../original
-for f in *; do
-	if [ ! -f "../subs/${f%.*}.de.vtt" ]; then
-		rm "$f"
-	fi
-done
-cd ../subs/
-for f in *; do
-	if [ ! -f "../original/${f%.*}.mp4" ]; then
-		rm "$f"
-	fi
-done
--- a/download.sh
+++ b/download.sh
@ -5,8 +5,8 @@ mkdir ../original

 #download subs
 cd ../subs
-youtube-dl --write-auto-sub --sub-lang de --yes-playlist -i --skip-download -o '%(playlist_index)s.%(ext)s' $1
+youtube-dl --write-auto-sub --yes-playlist --skip-download -o '%(playlist_index)s.%(ext)s' $1

 # download videos
 cd ../original
-youtube-dl -f 22 --yes-playlist -i -o '%(playlist_index)s.%(ext)s' $1
+youtube-dl -f 22 --yes-playlist -o '%(playlist_index)s.%(ext)s' $1
--- a/gen-script.py
+++ b/gen-script.py
@ -1,117 +0,0 @@
-#!/usr/bin/python
-
-# add subtitles download support and conversion
-# support different subtitles extensions
-# support subs and vids renaming
-# add debug file, I removed the old one because it wasnt working correctly
-
-# check if you tell ffmpeg after the video is over - how does it handle it?
-# if yes, add r_clamp(.). I would have to get the length of the videos
-
-# handler error whene there are no fragments/ there's only one in function merge_overlap
-
-import re
-import os
-import sys
-import subprocess
-from datetime import timedelta
-
-# it's used to make sure that eg. 00:00:02 doesn't become 23:59:52
-def left_clamps(t, delta):
-    if t < delta:
-        return timedelta()
-    else:
-        return t - delta
-
-# if two chunks overlap, they get merged
-def merge_overlap(data):
-    prev=data[-1] # probably there's a better way to start this loop
-    for i, cur in enumerate(data):
-        if prev['fname']==cur['fname'] and prev['end'] > cur['beg'] - sides: # that means we have to merge them
-            cur['beg']=prev['beg']
-            cur['desc']=prev['desc']+' | ' +cur['desc']
-            data[i-1]={}
-        prev = cur
-
-# it collects data of clips which contain searched phrase
-def generate_splice_data(file_names):
-    data = []
-    for i, f in enumerate(file_names):
-        file = open(path + f)
-        for i, line in enumerate(file):
-            if line[0:2] == '00':
-                prev = line # a timestamp line
-            elif re.search(search, line): # we only need to check lines which contain text, hence the first condition
-                data.append({
-                'fname':f,                                                                                  #file name
-                'beg':left_clamps(timedelta(minutes = int(prev[3  :5]), seconds=int(prev[6:  8])), sides),  #beginning timestamp
-                'end':timedelta(minutes = int(prev[20:22]), seconds=int(prev[23:25])) + sides,              #ending timestamp
-                'desc':line[:-1]})                                                                          #said lines
-    return data
-
-
-# it generates rip.sh - script which cut nessecsary part
-# i dunno how to spell nessessery
-def write_list_rip(data, mode):
-    file_list = open('list.txt', 'w')
-    file_rip = open('rip.sh', 'w')
-    file_rip.write('#!/bin/bash\n')
-    for (i, d) in enumerate(data):
-        if (d):
-            name = d['fname'].split('.')[0] # get a name without an extension
-            outname ="../out/{}-{:0>2}.mp4".format(name, i)
-
-            file_list.write("file '{}'\n".format(outname))
-            delta = d['end']-d['beg']
-
-            if mode == 'slow':
-                file_rip.write('ffmpeg -i ../original/{}.mp4 -ss 0{} -t 0{} -async 1 {}\n'.format(name, d['beg'], delta ,outname)) # slow but more accurate
-
-            elif mode =='fast':
-                true_beg = left_clamps(d['beg'], back)
-                d1 = d['beg'] - true_beg # we have to do it his was just in case the clip is at the beginning
-                file_rip.write('ffmpeg -ss 0{} -i ../original/{}.mp4 -ss 0{} -t 0{} -c copy {}\n'.format(true_beg, name, d1, delta, outname)) # fast but not accurate
-
-
-def handle_arguments():
-    if not len(sys.argv) in [3, 4]:
-        print('Error: incorrect number of arguments.')
-        print('Arguments: (searched phrase} (slow/fast) (sides, e.g. 5 or nothing)')
-        sys.exit()
-
-    if not sys.argv[2] in ['slow', 'fast']:
-        print('Error: second argument can be only one of the following: slow, fast')
-        sys.exit()
-
-    arguments = {'search': sys.argv[1], 'mode': sys.argv[2]}
-    
-    if (len(sys.argv)== 4):
-        try:
-            arguments.update({'sides': int(sys.argv[3])})
-        except:
-            print('Error: third argument is not a number')
-            sys.exit()
-    else:
-        arguments.update({'sides': 10})
-        
-
-    return arguments 
-
-if __name__ == "__main__":
-    arguments = handle_arguments()
-    search = arguments['search'] # the searched phrase
-
-    os.chmod('clean.sh', 0o755)
-    subprocess.call('./clean.sh', shell=True) # get rid of the old files
-
-    sides = timedelta(seconds=arguments['sides']) # time we cut on both sides
-    back  = timedelta(seconds=60) # how much do we want to go to get better keyframes. dunno how to explain this in two sentences
-    path = os.getcwd() + '/../subs/' # subtitles location
-
-    file_names = sorted(os.listdir(path))
-    data = generate_splice_data(file_names)
-    merge_overlap(data)
-    write_list_rip(data, arguments['mode'])
-
-    os.chmod('rip.sh', 0o755)
-    subprocess.call('./rip.sh', shell=True)
--- a/regex-test.py
+++ b/regex-test.py
@ -0,0 +1,36 @@
+import re
+import os
+
+from rough_edit.utils import str_to_timedelta, escape_string, generate_regex
+from rough_edit.file_writers import ffmpeg_command, mpv_command
+from rough_edit.handle_arguments import handle_arguments
+
+
+if __name__ == '__main__':
+    baseDir = '../'
+    subs_dir = f"{baseDir}subs/"
+    outDir = f"{baseDir}out/"
+
+    phrase, padding_left, padding_right = handle_arguments()
+    regex = generate_regex(phrase)
+
+    count = 0
+
+    with open('rip.sh', 'w') as rip_file:
+        rip_file.write(f'rm {outDir}/*\nmpv\\\n')
+        for filename in os.listdir(subs_dir):
+            subs_file = subs_dir + filename
+            with open(subs_file) as sample:
+                results = re.findall(regex, sample.read())
+                episode_num = filename[:3]
+                base_name = filename[3:-7]
+
+                for result in results:
+                    print(filename)
+                    print(result)
+                    beg = str_to_timedelta(result[0]) - padding_left
+                    end = str_to_timedelta(result[-1]) + padding_right
+                    # rip_file.write(ffmpeg_command(episode_num, beg, end, count, outDir, base_name))
+                    rip_file.write(mpv_command(episode_num, beg, end))
+                    count += 1
+        rip_file.write('\techo Finished')
--- a/render_video.sh
+++ b/render_video.sh
@ -1,4 +0,0 @@
-#!/bin/sh
-cd ../out
-find *.mp4 | sed 's:\ :\ :g'| sed 's/^/file /' > list.txt
-ffmpeg -f concat -safe 0 -segment_time_metadata 1 -i list.txt -vf select=concatdec_select -af aselect=concatdec_select,aresample=async=1 final.mp4
--- a/rough_edit/file_writers.py
+++ b/rough_edit/file_writers.py
@ -0,0 +1,13 @@
+from rough_edit.utils import escape_string
+
+
+def mpv_command(episode_num, beg, end):
+    return f'  --\\` ../original/{episode_num}* --start={beg} --end={end} --\\~\\\n'.replace('`', '{').replace('~', '}')
+
+
+def ffmpeg_command(episode_num, beg, end, count, outDir, base_name):
+    padded_count = f'{count:06}'
+    return f"""
+# {base_name}
+ffmpeg -i ../original/{episode_num}* -ss {beg} -t {end-beg} -async 1 "{outDir}{padded_count}{base_name}.mp4" -y
+"""
--- a/rough_edit/handle_arguments.py
+++ b/rough_edit/handle_arguments.py
@ -0,0 +1,23 @@
+import sys
+from datetime import timedelta
+
+def print_expected_call_message(additional_message):
+    print(f"""{additional_message}
+Expected application call:
+python3 regex_text.py [searched phrase] [left_padding] [right_padding]
+Example call:
+python3 regex_text.py "I don't know" 2 3""")
+
+
+
+def handle_arguments():
+    if not (arg_len := len(sys.argv)) == 4:
+        print_expected_call_message(f'Expected two arguments, got {arg_len-1}.')
+        exit()
+    try:
+        phrase = sys.argv[1]
+        padding_left, padding_right = [timedelta(int(number)) for number in sys.argv[2:4]]
+        return([phrase, padding_left, padding_right])
+    except:
+        print_expected_call_message(f'An error has occured.')
+        exit()
--- a/rough_edit/normalize_subs.py
+++ b/rough_edit/normalize_subs.py
@ -0,0 +1,27 @@
+import os
+import re
+
+
+def check_conditions(line):
+    # tells if the line passes some regex comparasions
+    conditions = [
+        '^WEBVTT$',
+        '^Kind: captions$',
+        '^Language:.*$',
+        '^.*align:start position:0%.*$',
+        "^[a-zA-Z'\s\-\[\]]{3,}$"
+    ]
+    for condition in conditions:
+        if re.match(condition, line):
+            return False
+    return True
+
+
+for file_name in os.listdir('../../subs'):
+    print(file_name)
+    with open(f'../../subs/{file_name}', 'r') as infile:
+        with open(f'../../out-subs/{file_name}', 'w') as outfile:
+            for line in infile:
+                if check_conditions(line):
+                    outfile.write(line)
+                    # .replace('\n', ''))
--- a/rough_edit/stamper.py
+++ b/rough_edit/stamper.py
@ -0,0 +1,33 @@
+from datetime import timedelta
+
+
+def string_to_timedelta(time):
+    time = time.split(':')
+    return timedelta(hours=int(time[0]), minutes=int(time[1]), seconds=int(time[2]))
+
+
+def split_list(alist, wanted_parts=1):
+    length = len(alist)
+    return [alist[i*length // wanted_parts: (i+1)*length // wanted_parts] for i in range(wanted_parts)]
+
+
+with open('rip2.sh') as file:
+    data = file.read().split('\n')[:-1]
+    data = split_list(data, 153)
+    out = []
+
+    for d in data:
+        print(d)
+        d[1] = string_to_timedelta(d[1])
+        d[2] = string_to_timedelta(d[2])
+
+    cur = timedelta(seconds=0)
+    with open('rip3.sh', 'w') as outfile:
+        for d in data:
+            outfile.write(str(cur))
+            outfile.write(' ')
+            outfile.write(d[0])
+            outfile.write(' at ')
+            outfile.write(str(d[1]))
+            outfile.write('\n')
+            cur += d[2]
--- a/rough_edit/utils.py
+++ b/rough_edit/utils.py
@ -0,0 +1,36 @@
+import re
+from datetime import timedelta
+import os
+
+
+def escape_string(string):
+    # escapes chars like ' and &
+    # strings I have to replace to make the * work
+    for sign in [' ', "'", '"', '(', ')', '&']:
+        string = string.replace(sign, '\\'+sign)
+    return string
+
+
+def str_to_timedelta(string):
+    # TODO later: change findall to get rid of those [0]s
+    time_regex_format = r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})"
+    results = re.findall(time_regex_format, string)
+    hours, minutes, seconds, milliseconds = results[0]
+    return timedelta(hours=int(hours),
+                     minutes=int(minutes),
+                     seconds=int(seconds),
+                     milliseconds=int(milliseconds))
+
+
+def generate_regex(prase):
+    phrase_array = prase.split(" ")
+    time_regex = r"(\d{2}:\d{2}:\d{2}.\d{3})"
+    out_regex = ""
+
+    for word in phrase_array:
+        out_regex += f"<{time_regex}><c> ({word})</c>"
+    out_regex += f"<{time_regex}>"
+
+    print(out_regex)
+    return out_regex
+    # regex = f"<{timeRegex}><c> ({text})</c><{timeRegex}><c> ({text})</c><{timeRegex}>"
Author	SHA1	Message	Date
Paweł Kołaczyński	77d12eb6ce	Implemented proper argv handling	2020-10-27 01:31:43 +01:00
Paweł Kołaczyński	982ecd9d4b	Variables to snake_case	2020-10-27 00:44:03 +01:00
Paweł Kołaczyński	f45a266767	Deleted old files	2020-10-27 00:32:01 +01:00
Pawel Kolaczynski	f43bda315d	added stamper	2020-10-09 16:06:14 +02:00
Pawel Kolaczynski	921dfd3841	regex-test split into modules, mpvCommand works	2020-10-08 20:56:01 +02:00
Pawel Kolaczynski	2736959e0b	rewrited the core to regex-rest	2020-09-24 01:09:09 +02:00
Pawel Kolaczynski	aa41d474d6	rewriting the whole thing	2020-09-21 14:54:56 +02:00