Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 77d12eb6ce | |||
| 982ecd9d4b | |||
| f45a266767 | |||
| f43bda315d | |||
| 921dfd3841 | |||
| 2736959e0b | |||
| aa41d474d6 |
8
.gitignore
vendored
Normal file → Executable file
8
.gitignore
vendored
Normal file → Executable file
@ -1,4 +1,10 @@
|
||||
out/
|
||||
subs/
|
||||
list.txt
|
||||
rip.sh
|
||||
rip*
|
||||
debug.txt
|
||||
output.mp4
|
||||
.vscode
|
||||
sumout/
|
||||
out*
|
||||
__pycache__
|
||||
2
README.md
Normal file → Executable file
2
README.md
Normal file → Executable file
@ -1,4 +1,4 @@
|
||||
# Rough Editor
|
||||
# The README is not accurate at the moment!
|
||||
|
||||
## Description
|
||||
|
||||
|
||||
6
clean.sh
6
clean.sh
@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm list.txt rip.sh output.mp4 > /dev/null 2>&1
|
||||
rm -r ../out > /dev/null 2>&1
|
||||
mkdir ../out > /dev/null 2>&1
|
||||
echo "clean.sh finished its job"
|
||||
13
convert.sh
13
convert.sh
@ -1,13 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# it converts subs from .vtt to .srt
|
||||
|
||||
for filename in ../subs/*; do
|
||||
file="${filename%.*}";
|
||||
in="${file}.vtt"
|
||||
out="${file}.srt"
|
||||
ffmpeg -i $in $out
|
||||
done
|
||||
|
||||
rm ../subs/*vtt
|
||||
echo "convert.sh finished its job"
|
||||
117
gen-script.py
117
gen-script.py
@ -1,117 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# add subtitles download support and conversion
|
||||
# support different subtitles extensions
|
||||
# support subs and vids renaming
|
||||
# add debug file, I removed the old one because it wasnt working correctly
|
||||
|
||||
# check if you tell ffmpeg after the video is over - how does it handle it?
|
||||
# if yes, add r_clamp(.). I would have to get the length of the videos
|
||||
|
||||
# handler error whene there are no fragments/ there's only one in function merge_overlap
|
||||
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from datetime import timedelta
|
||||
|
||||
# it's used to make sure that eg. 00:00:02 doesn't become 23:59:52
|
||||
def left_clamps(t, delta):
|
||||
if t < delta:
|
||||
return timedelta()
|
||||
else:
|
||||
return t - delta
|
||||
|
||||
# if two chunks overlap, they get merged
|
||||
def merge_overlap(data):
|
||||
prev=data[-1] # probably there's a better way to start this loop
|
||||
for i, cur in enumerate(data):
|
||||
if prev['fname']==cur['fname'] and prev['end'] > cur['beg'] - sides: # that means we have to merge them
|
||||
cur['beg']=prev['beg']
|
||||
cur['desc']=prev['desc']+' | ' +cur['desc']
|
||||
data[i-1]={}
|
||||
prev = cur
|
||||
|
||||
# it collects data of clips which contain searched phrase
|
||||
def generate_splice_data(file_names):
|
||||
data = []
|
||||
for i, f in enumerate(file_names):
|
||||
file = open(path + f)
|
||||
for i, line in enumerate(file):
|
||||
if line[0:2] == '00':
|
||||
prev = line # a timestamp line
|
||||
elif re.search(search, line): # we only need to check lines which contain text, hence the first condition
|
||||
data.append({
|
||||
'fname':f, #file name
|
||||
'beg':left_clamps(timedelta(minutes = int(prev[3 :5]), seconds=int(prev[6: 8])), sides), #beginning timestamp
|
||||
'end':timedelta(minutes = int(prev[20:22]), seconds=int(prev[23:25])) + sides, #ending timestamp
|
||||
'desc':line[:-1]}) #said lines
|
||||
return data
|
||||
|
||||
|
||||
# it generates rip.sh - script which cut nessecsary part
|
||||
# i dunno how to spell nessessery
|
||||
def write_list_rip(data, mode):
|
||||
file_list = open('list.txt', 'w')
|
||||
file_rip = open('rip.sh', 'w')
|
||||
file_rip.write('#!/bin/bash\n')
|
||||
for (i, d) in enumerate(data):
|
||||
if (d):
|
||||
name = d['fname'].split('.')[0] # get a name without an extension
|
||||
outname ="../out/{}-{:0>2}.mp4".format(name, i)
|
||||
|
||||
file_list.write("file '{}'\n".format(outname))
|
||||
delta = d['end']-d['beg']
|
||||
|
||||
if mode == 'slow':
|
||||
file_rip.write('ffmpeg -i ../original/{}.mp4 -ss 0{} -t 0{} -async 1 {}\n'.format(name, d['beg'], delta ,outname)) # slow but more accurate
|
||||
|
||||
elif mode =='fast':
|
||||
true_beg = left_clamps(d['beg'], back)
|
||||
d1 = d['beg'] - true_beg # we have to do it his was just in case the clip is at the beginning
|
||||
file_rip.write('ffmpeg -ss 0{} -i ../original/{}.mp4 -ss 0{} -t 0{} -c copy {}\n'.format(true_beg, name, d1, delta, outname)) # fast but not accurate
|
||||
|
||||
|
||||
def handle_arguments():
|
||||
if not len(sys.argv) in [3, 4]:
|
||||
print('Error: incorrect number of arguments.')
|
||||
print('Arguments: (searched phrase} (slow/fast) (sides, e.g. 5 or nothing)')
|
||||
sys.exit()
|
||||
|
||||
if not sys.argv[2] in ['slow', 'fast']:
|
||||
print('Error: second argument can be only one of the following: slow, fast')
|
||||
sys.exit()
|
||||
|
||||
arguments = {'search': sys.argv[1], 'mode': sys.argv[2]}
|
||||
|
||||
if (len(sys.argv)== 4):
|
||||
try:
|
||||
arguments.update({'sides': int(sys.argv[3])})
|
||||
except:
|
||||
print('Error: third argument is not a number')
|
||||
sys.exit()
|
||||
else:
|
||||
arguments.update({'sides': 10})
|
||||
|
||||
|
||||
return arguments
|
||||
|
||||
if __name__ == "__main__":
|
||||
arguments = handle_arguments()
|
||||
search = arguments['search'] # the searched phrase
|
||||
|
||||
os.chmod('clean.sh', 0o755)
|
||||
subprocess.call('./clean.sh', shell=True) # get rid of the old files
|
||||
|
||||
sides = timedelta(seconds=arguments['sides']) # time we cut on both sides
|
||||
back = timedelta(seconds=60) # how much do we want to go to get better keyframes. dunno how to explain this in two sentences
|
||||
path = os.getcwd() + '/../subs/' # subtitles location
|
||||
|
||||
file_names = sorted(os.listdir(path))
|
||||
data = generate_splice_data(file_names)
|
||||
merge_overlap(data)
|
||||
write_list_rip(data, arguments['mode'])
|
||||
|
||||
os.chmod('rip.sh', 0o755)
|
||||
subprocess.call('./rip.sh', shell=True)
|
||||
36
regex-test.py
Executable file
36
regex-test.py
Executable file
@ -0,0 +1,36 @@
|
||||
import re
|
||||
import os
|
||||
|
||||
from rough_edit.utils import str_to_timedelta, escape_string, generate_regex
|
||||
from rough_edit.file_writers import ffmpeg_command, mpv_command
|
||||
from rough_edit.handle_arguments import handle_arguments
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
baseDir = '../'
|
||||
subs_dir = f"{baseDir}subs/"
|
||||
outDir = f"{baseDir}out/"
|
||||
|
||||
phrase, padding_left, padding_right = handle_arguments()
|
||||
regex = generate_regex(phrase)
|
||||
|
||||
count = 0
|
||||
|
||||
with open('rip.sh', 'w') as rip_file:
|
||||
rip_file.write(f'rm {outDir}/*\nmpv\\\n')
|
||||
for filename in os.listdir(subs_dir):
|
||||
subs_file = subs_dir + filename
|
||||
with open(subs_file) as sample:
|
||||
results = re.findall(regex, sample.read())
|
||||
episode_num = filename[:3]
|
||||
base_name = filename[3:-7]
|
||||
|
||||
for result in results:
|
||||
print(filename)
|
||||
print(result)
|
||||
beg = str_to_timedelta(result[0]) - padding_left
|
||||
end = str_to_timedelta(result[-1]) + padding_right
|
||||
# rip_file.write(ffmpeg_command(episode_num, beg, end, count, outDir, base_name))
|
||||
rip_file.write(mpv_command(episode_num, beg, end))
|
||||
count += 1
|
||||
rip_file.write('\techo Finished')
|
||||
13
rough_edit/file_writers.py
Executable file
13
rough_edit/file_writers.py
Executable file
@ -0,0 +1,13 @@
|
||||
from rough_edit.utils import escape_string
|
||||
|
||||
|
||||
def mpv_command(episode_num, beg, end):
|
||||
return f' --\\` ../original/{episode_num}* --start={beg} --end={end} --\\~\\\n'.replace('`', '{').replace('~', '}')
|
||||
|
||||
|
||||
def ffmpeg_command(episode_num, beg, end, count, outDir, base_name):
|
||||
padded_count = f'{count:06}'
|
||||
return f"""
|
||||
# {base_name}
|
||||
ffmpeg -i ../original/{episode_num}* -ss {beg} -t {end-beg} -async 1 "{outDir}{padded_count}{base_name}.mp4" -y
|
||||
"""
|
||||
23
rough_edit/handle_arguments.py
Executable file
23
rough_edit/handle_arguments.py
Executable file
@ -0,0 +1,23 @@
|
||||
import sys
|
||||
from datetime import timedelta
|
||||
|
||||
def print_expected_call_message(additional_message):
|
||||
print(f"""{additional_message}
|
||||
Expected application call:
|
||||
python3 regex_text.py [searched phrase] [left_padding] [right_padding]
|
||||
Example call:
|
||||
python3 regex_text.py "I don't know" 2 3""")
|
||||
|
||||
|
||||
|
||||
def handle_arguments():
|
||||
if not (arg_len := len(sys.argv)) == 4:
|
||||
print_expected_call_message(f'Expected two arguments, got {arg_len-1}.')
|
||||
exit()
|
||||
try:
|
||||
phrase = sys.argv[1]
|
||||
padding_left, padding_right = [timedelta(int(number)) for number in sys.argv[2:4]]
|
||||
return([phrase, padding_left, padding_right])
|
||||
except:
|
||||
print_expected_call_message(f'An error has occured.')
|
||||
exit()
|
||||
27
rough_edit/normalize_subs.py
Executable file
27
rough_edit/normalize_subs.py
Executable file
@ -0,0 +1,27 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
def check_conditions(line):
|
||||
# tells if the line passes some regex comparasions
|
||||
conditions = [
|
||||
'^WEBVTT$',
|
||||
'^Kind: captions$',
|
||||
'^Language:.*$',
|
||||
'^.*align:start position:0%.*$',
|
||||
"^[a-zA-Z'\s\-\[\]]{3,}$"
|
||||
]
|
||||
for condition in conditions:
|
||||
if re.match(condition, line):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
for file_name in os.listdir('../../subs'):
|
||||
print(file_name)
|
||||
with open(f'../../subs/{file_name}', 'r') as infile:
|
||||
with open(f'../../out-subs/{file_name}', 'w') as outfile:
|
||||
for line in infile:
|
||||
if check_conditions(line):
|
||||
outfile.write(line)
|
||||
# .replace('\n', ''))
|
||||
33
rough_edit/stamper.py
Executable file
33
rough_edit/stamper.py
Executable file
@ -0,0 +1,33 @@
|
||||
from datetime import timedelta
|
||||
|
||||
|
||||
def string_to_timedelta(time):
|
||||
time = time.split(':')
|
||||
return timedelta(hours=int(time[0]), minutes=int(time[1]), seconds=int(time[2]))
|
||||
|
||||
|
||||
def split_list(alist, wanted_parts=1):
|
||||
length = len(alist)
|
||||
return [alist[i*length // wanted_parts: (i+1)*length // wanted_parts] for i in range(wanted_parts)]
|
||||
|
||||
|
||||
with open('rip2.sh') as file:
|
||||
data = file.read().split('\n')[:-1]
|
||||
data = split_list(data, 153)
|
||||
out = []
|
||||
|
||||
for d in data:
|
||||
print(d)
|
||||
d[1] = string_to_timedelta(d[1])
|
||||
d[2] = string_to_timedelta(d[2])
|
||||
|
||||
cur = timedelta(seconds=0)
|
||||
with open('rip3.sh', 'w') as outfile:
|
||||
for d in data:
|
||||
outfile.write(str(cur))
|
||||
outfile.write(' ')
|
||||
outfile.write(d[0])
|
||||
outfile.write(' at ')
|
||||
outfile.write(str(d[1]))
|
||||
outfile.write('\n')
|
||||
cur += d[2]
|
||||
36
rough_edit/utils.py
Executable file
36
rough_edit/utils.py
Executable file
@ -0,0 +1,36 @@
|
||||
import re
|
||||
from datetime import timedelta
|
||||
import os
|
||||
|
||||
|
||||
def escape_string(string):
|
||||
# escapes chars like ' and &
|
||||
# strings I have to replace to make the * work
|
||||
for sign in [' ', "'", '"', '(', ')', '&']:
|
||||
string = string.replace(sign, '\\'+sign)
|
||||
return string
|
||||
|
||||
|
||||
def str_to_timedelta(string):
|
||||
# TODO later: change findall to get rid of those [0]s
|
||||
time_regex_format = r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})"
|
||||
results = re.findall(time_regex_format, string)
|
||||
hours, minutes, seconds, milliseconds = results[0]
|
||||
return timedelta(hours=int(hours),
|
||||
minutes=int(minutes),
|
||||
seconds=int(seconds),
|
||||
milliseconds=int(milliseconds))
|
||||
|
||||
|
||||
def generate_regex(prase):
|
||||
phrase_array = prase.split(" ")
|
||||
time_regex = r"(\d{2}:\d{2}:\d{2}.\d{3})"
|
||||
out_regex = ""
|
||||
|
||||
for word in phrase_array:
|
||||
out_regex += f"<{time_regex}><c> ({word})</c>"
|
||||
out_regex += f"<{time_regex}>"
|
||||
|
||||
print(out_regex)
|
||||
return out_regex
|
||||
# regex = f"<{timeRegex}><c> ({text})</c><{timeRegex}><c> ({text})</c><{timeRegex}>"
|
||||
Reference in New Issue
Block a user