Compare commits

7 Commits

15 changed files with 178 additions and 157 deletions

8
.gitignore vendored Normal file → Executable file
View File

@ -1,4 +1,10 @@
out/
subs/
list.txt
rip.sh
rip*
debug.txt
output.mp4
.vscode
sumout/
out*
__pycache__

0
LICENSE Normal file → Executable file
View File

2
README.md Normal file → Executable file
View File

@ -1,4 +1,4 @@
# Rough Editor
# The README is not accurate at the moment!
## Description

View File

@ -1,6 +0,0 @@
#!/bin/bash
rm list.txt rip.sh output.mp4 > /dev/null 2>&1
rm -r ../out > /dev/null 2>&1
mkdir ../out > /dev/null 2>&1
echo "clean.sh finished its job"

View File

@ -1,13 +0,0 @@
#!/bin/bash
# it converts subs from .vtt to .srt
for filename in ../subs/*; do
file="${filename%.*}";
in="${file}.vtt"
out="${file}.srt"
ffmpeg -i $in $out
done
rm ../subs/*vtt
echo "convert.sh finished its job"

View File

@ -1,13 +0,0 @@
#!/bin/sh
cd ../original
for f in *; do
if [ ! -f "../subs/${f%.*}.de.vtt" ]; then
rm "$f"
fi
done
cd ../subs/
for f in *; do
if [ ! -f "../original/${f%.*}.mp4" ]; then
rm "$f"
fi
done

View File

@ -5,8 +5,8 @@ mkdir ../original
#download subs
cd ../subs
youtube-dl --write-auto-sub --sub-lang de --yes-playlist -i --skip-download -o '%(playlist_index)s.%(ext)s' $1
youtube-dl --write-auto-sub --yes-playlist --skip-download -o '%(playlist_index)s.%(ext)s' $1
# download videos
cd ../original
youtube-dl -f 22 --yes-playlist -i -o '%(playlist_index)s.%(ext)s' $1
youtube-dl -f 22 --yes-playlist -o '%(playlist_index)s.%(ext)s' $1

View File

@ -1,117 +0,0 @@
#!/usr/bin/python
# add subtitles download support and conversion
# support different subtitles extensions
# support subs and vids renaming
# add debug file, I removed the old one because it wasnt working correctly
# check if you tell ffmpeg after the video is over - how does it handle it?
# if yes, add r_clamp(.). I would have to get the length of the videos
# handler error whene there are no fragments/ there's only one in function merge_overlap
import re
import os
import sys
import subprocess
from datetime import timedelta
# it's used to make sure that eg. 00:00:02 doesn't become 23:59:52
def left_clamps(t, delta):
if t < delta:
return timedelta()
else:
return t - delta
# if two chunks overlap, they get merged
def merge_overlap(data):
prev=data[-1] # probably there's a better way to start this loop
for i, cur in enumerate(data):
if prev['fname']==cur['fname'] and prev['end'] > cur['beg'] - sides: # that means we have to merge them
cur['beg']=prev['beg']
cur['desc']=prev['desc']+' | ' +cur['desc']
data[i-1]={}
prev = cur
# it collects data of clips which contain searched phrase
def generate_splice_data(file_names):
data = []
for i, f in enumerate(file_names):
file = open(path + f)
for i, line in enumerate(file):
if line[0:2] == '00':
prev = line # a timestamp line
elif re.search(search, line): # we only need to check lines which contain text, hence the first condition
data.append({
'fname':f, #file name
'beg':left_clamps(timedelta(minutes = int(prev[3 :5]), seconds=int(prev[6: 8])), sides), #beginning timestamp
'end':timedelta(minutes = int(prev[20:22]), seconds=int(prev[23:25])) + sides, #ending timestamp
'desc':line[:-1]}) #said lines
return data
# it generates rip.sh - script which cut nessecsary part
# i dunno how to spell nessessery
def write_list_rip(data, mode):
file_list = open('list.txt', 'w')
file_rip = open('rip.sh', 'w')
file_rip.write('#!/bin/bash\n')
for (i, d) in enumerate(data):
if (d):
name = d['fname'].split('.')[0] # get a name without an extension
outname ="../out/{}-{:0>2}.mp4".format(name, i)
file_list.write("file '{}'\n".format(outname))
delta = d['end']-d['beg']
if mode == 'slow':
file_rip.write('ffmpeg -i ../original/{}.mp4 -ss 0{} -t 0{} -async 1 {}\n'.format(name, d['beg'], delta ,outname)) # slow but more accurate
elif mode =='fast':
true_beg = left_clamps(d['beg'], back)
d1 = d['beg'] - true_beg # we have to do it his was just in case the clip is at the beginning
file_rip.write('ffmpeg -ss 0{} -i ../original/{}.mp4 -ss 0{} -t 0{} -c copy {}\n'.format(true_beg, name, d1, delta, outname)) # fast but not accurate
def handle_arguments():
if not len(sys.argv) in [3, 4]:
print('Error: incorrect number of arguments.')
print('Arguments: (searched phrase} (slow/fast) (sides, e.g. 5 or nothing)')
sys.exit()
if not sys.argv[2] in ['slow', 'fast']:
print('Error: second argument can be only one of the following: slow, fast')
sys.exit()
arguments = {'search': sys.argv[1], 'mode': sys.argv[2]}
if (len(sys.argv)== 4):
try:
arguments.update({'sides': int(sys.argv[3])})
except:
print('Error: third argument is not a number')
sys.exit()
else:
arguments.update({'sides': 10})
return arguments
if __name__ == "__main__":
arguments = handle_arguments()
search = arguments['search'] # the searched phrase
os.chmod('clean.sh', 0o755)
subprocess.call('./clean.sh', shell=True) # get rid of the old files
sides = timedelta(seconds=arguments['sides']) # time we cut on both sides
back = timedelta(seconds=60) # how much do we want to go to get better keyframes. dunno how to explain this in two sentences
path = os.getcwd() + '/../subs/' # subtitles location
file_names = sorted(os.listdir(path))
data = generate_splice_data(file_names)
merge_overlap(data)
write_list_rip(data, arguments['mode'])
os.chmod('rip.sh', 0o755)
subprocess.call('./rip.sh', shell=True)

36
regex-test.py Executable file
View File

@ -0,0 +1,36 @@
import re
import os
from rough_edit.utils import str_to_timedelta, escape_string, generate_regex
from rough_edit.file_writers import ffmpeg_command, mpv_command
from rough_edit.handle_arguments import handle_arguments
if __name__ == '__main__':
baseDir = '../'
subs_dir = f"{baseDir}subs/"
outDir = f"{baseDir}out/"
phrase, padding_left, padding_right = handle_arguments()
regex = generate_regex(phrase)
count = 0
with open('rip.sh', 'w') as rip_file:
rip_file.write(f'rm {outDir}/*\nmpv\\\n')
for filename in os.listdir(subs_dir):
subs_file = subs_dir + filename
with open(subs_file) as sample:
results = re.findall(regex, sample.read())
episode_num = filename[:3]
base_name = filename[3:-7]
for result in results:
print(filename)
print(result)
beg = str_to_timedelta(result[0]) - padding_left
end = str_to_timedelta(result[-1]) + padding_right
# rip_file.write(ffmpeg_command(episode_num, beg, end, count, outDir, base_name))
rip_file.write(mpv_command(episode_num, beg, end))
count += 1
rip_file.write('\techo Finished')

View File

@ -1,4 +0,0 @@
#!/bin/sh
cd ../out
find *.mp4 | sed 's:\ :\ :g'| sed 's/^/file /' > list.txt
ffmpeg -f concat -safe 0 -segment_time_metadata 1 -i list.txt -vf select=concatdec_select -af aselect=concatdec_select,aresample=async=1 final.mp4

13
rough_edit/file_writers.py Executable file
View File

@ -0,0 +1,13 @@
from rough_edit.utils import escape_string
def mpv_command(episode_num, beg, end):
return f' --\\` ../original/{episode_num}* --start={beg} --end={end} --\\~\\\n'.replace('`', '{').replace('~', '}')
def ffmpeg_command(episode_num, beg, end, count, outDir, base_name):
padded_count = f'{count:06}'
return f"""
# {base_name}
ffmpeg -i ../original/{episode_num}* -ss {beg} -t {end-beg} -async 1 "{outDir}{padded_count}{base_name}.mp4" -y
"""

23
rough_edit/handle_arguments.py Executable file
View File

@ -0,0 +1,23 @@
import sys
from datetime import timedelta
def print_expected_call_message(additional_message):
print(f"""{additional_message}
Expected application call:
python3 regex_text.py [searched phrase] [left_padding] [right_padding]
Example call:
python3 regex_text.py "I don't know" 2 3""")
def handle_arguments():
if not (arg_len := len(sys.argv)) == 4:
print_expected_call_message(f'Expected two arguments, got {arg_len-1}.')
exit()
try:
phrase = sys.argv[1]
padding_left, padding_right = [timedelta(int(number)) for number in sys.argv[2:4]]
return([phrase, padding_left, padding_right])
except:
print_expected_call_message(f'An error has occured.')
exit()

27
rough_edit/normalize_subs.py Executable file
View File

@ -0,0 +1,27 @@
import os
import re
def check_conditions(line):
# tells if the line passes some regex comparasions
conditions = [
'^WEBVTT$',
'^Kind: captions$',
'^Language:.*$',
'^.*align:start position:0%.*$',
"^[a-zA-Z'\s\-\[\]]{3,}$"
]
for condition in conditions:
if re.match(condition, line):
return False
return True
for file_name in os.listdir('../../subs'):
print(file_name)
with open(f'../../subs/{file_name}', 'r') as infile:
with open(f'../../out-subs/{file_name}', 'w') as outfile:
for line in infile:
if check_conditions(line):
outfile.write(line)
# .replace('\n', ''))

33
rough_edit/stamper.py Executable file
View File

@ -0,0 +1,33 @@
from datetime import timedelta
def string_to_timedelta(time):
time = time.split(':')
return timedelta(hours=int(time[0]), minutes=int(time[1]), seconds=int(time[2]))
def split_list(alist, wanted_parts=1):
length = len(alist)
return [alist[i*length // wanted_parts: (i+1)*length // wanted_parts] for i in range(wanted_parts)]
with open('rip2.sh') as file:
data = file.read().split('\n')[:-1]
data = split_list(data, 153)
out = []
for d in data:
print(d)
d[1] = string_to_timedelta(d[1])
d[2] = string_to_timedelta(d[2])
cur = timedelta(seconds=0)
with open('rip3.sh', 'w') as outfile:
for d in data:
outfile.write(str(cur))
outfile.write(' ')
outfile.write(d[0])
outfile.write(' at ')
outfile.write(str(d[1]))
outfile.write('\n')
cur += d[2]

36
rough_edit/utils.py Executable file
View File

@ -0,0 +1,36 @@
import re
from datetime import timedelta
import os
def escape_string(string):
# escapes chars like ' and &
# strings I have to replace to make the * work
for sign in [' ', "'", '"', '(', ')', '&']:
string = string.replace(sign, '\\'+sign)
return string
def str_to_timedelta(string):
# TODO later: change findall to get rid of those [0]s
time_regex_format = r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})"
results = re.findall(time_regex_format, string)
hours, minutes, seconds, milliseconds = results[0]
return timedelta(hours=int(hours),
minutes=int(minutes),
seconds=int(seconds),
milliseconds=int(milliseconds))
def generate_regex(prase):
phrase_array = prase.split(" ")
time_regex = r"(\d{2}:\d{2}:\d{2}.\d{3})"
out_regex = ""
for word in phrase_array:
out_regex += f"<{time_regex}><c> ({word})</c>"
out_regex += f"<{time_regex}>"
print(out_regex)
return out_regex
# regex = f"<{timeRegex}><c> ({text})</c><{timeRegex}><c> ({text})</c><{timeRegex}>"