Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Technik
protocol-translation-live
Commits
091bce2e
Commit
091bce2e
authored
Apr 24, 2022
by
Jakob Lerch
Browse files
add main algorithm
parent
d86fe44d
Changes
2
Hide whitespace changes
Inline
Side-by-side
config.conf
View file @
091bce2e
[
main
]
active_set_size
=
2
header_size
=
10
header_size
=
10
[
translation
]
service
=
libretranslate
...
...
protocol_translation_live/__main__.py
View file @
091bce2e
import
json
import
time
from
configparser
import
ConfigParser
from
path
lib
import
Path
from
difflib
import
Differ
from
diff
lib
import
SequenceMatcher
from
itertools
import
takewhile
from
math
import
ceil
from
pathlib
import
Path
import
atexit
import
requests
...
...
@@ -14,9 +16,10 @@ from translation import DeepL, LibreTranslate, Translation
# TODO: scheint nur bei fem-instanz zu funktionieren. bei libretranslate.de kackt das ab
# TODO: implement possibility to execute this package for multiple pairs of protocols (see matterbridge)
# TODO: work on appending text:
# idea: 2 states:
# * detection of active line (using diff)
# * translation of active line
# write termination message on exit
atexit
.
register
(
lambda
:
dst_pad
.
write
(
"[PROTOCOL TRANSLATION HAS BEEN TERMINATED]
\n
"
+
dst_pad
.
read
()))
if
__name__
==
"__main__"
:
# read config files
...
...
@@ -28,7 +31,6 @@ if __name__ == "__main__":
config
.
read
(
Path
(
__file__
).
parent
.
parent
/
"config.conf"
)
# read main
active_set_size
=
int
(
config
[
"main"
][
"active_set_size"
])
header_size
=
int
(
config
[
"main"
][
"header_size"
])
# create src pad
...
...
@@ -71,23 +73,88 @@ if __name__ == "__main__":
raise
ValueError
(
"config.conf: 'service' in [translation] is not valid"
)
# do translation
# ..initial translation
# initial translation
dst_pad
.
write
(
"initializing..."
)
initial_text
=
src_pad
.
read
().
split
(
'
\n
'
)
header
=
'
\n
'
.
join
(
initial_text
[
0
:
header_size
])
body
=
'
\n
'
.
join
(
initial_text
[
header_size
:])
header
=
initial_text
[
0
:
header_size
]
body
=
initial_text
[
header_size
:]
# ..only include line break after header if header size > 0
joined_header
=
'
\n
'
.
join
(
header
)
+
'
\n
'
if
len
(
header
)
>
0
else
""
translated_body
=
t
.
translate
(
body
,
src_lang
,
dst_lang
)
dst_pad
.
write
(
header
+
'
\n
'
+
translated_body
)
# ..strip leading lines consisting of whitespace
# ..add them when writing to dst_pad
# ..this has to be done, because libretranslate ignores leading whitespace,
# ..leading to incorrect order of the translated lines
leading_space_lines
=
list
(
takewhile
(
lambda
line
:
line
.
isspace
()
or
not
line
,
body
))
# ..only include line break after lines if size > 0
joined_leading_space_lines
=
'
\n
'
.
join
(
leading_space_lines
)
+
'
\n
'
if
len
(
leading_space_lines
)
>
0
else
""
body
=
body
[
len
(
leading_space_lines
):]
joined_translated_body
=
t
.
translate
(
'
\n
'
.
join
(
body
),
src_lang
,
dst_lang
)
dst_pad
.
write
(
joined_header
+
joined_leading_space_lines
+
joined_translated_body
)
# ..further translation
d
=
Differ
()
s
=
SequenceMatcher
()
translated_body
=
joined_translated_body
.
split
(
'
\n
'
)
while
True
:
# TODO: it is assumed, that libretranslate definitely translates the protocol line by line
# this is to be tested
# [old_]body and [old_]translated_body are lists of lines
old_body
=
body
body
=
src_pad
.
read
()[
header_size
:]
# TODO: use diff to find changes of protocol, extract them, translate them, insert them, post them
# read body (without header)
body
=
src_pad
.
read
().
split
(
'
\n
'
)[
header_size
:]
# again, handle leading space lines
leading_space_lines
=
list
(
takewhile
(
lambda
line
:
line
.
isspace
()
or
not
line
,
body
))
joined_leading_space_lines
=
'
\n
'
.
join
(
leading_space_lines
)
+
'
\n
'
if
len
(
leading_space_lines
)
>
0
else
""
body
=
body
[
len
(
leading_space_lines
):]
# sequence matcher get_opcodes
s
.
set_seqs
(
old_body
,
body
)
opcodes
=
s
.
get_opcodes
()
# this line does a couple of things:
# ..create a generator
# ..add all elements of 'opcodes' to it except those with the 'equal'-tag
# ..compute the text in the lines that have been changed and translate it
# ..except for the case, that the tag is 'delete' which is trivial
translated_opcodes
=
(
(
e
[
0
],
e
[
1
],
e
[
2
],
e
[
3
],
e
[
4
],
(
lambda
:
t
.
translate
(
'
\n
'
.
join
(
body
[
e
[
3
]:
e
[
4
]]),
src_lang
,
dst_lang
).
split
(
'
\n
'
)
if
e
[
0
]
!=
"delete"
else
None
)())
for
e
in
opcodes
if
e
[
0
]
!=
"equal"
)
# insert the translated text into the old_translated_body, thus creating translated_body
added_lines
=
0
for
e
in
translated_opcodes
:
if
e
[
0
]
==
"delete"
:
del
translated_body
[
e
[
1
]
+
added_lines
:
e
[
2
]
+
added_lines
]
added_lines
-=
e
[
2
]
-
e
[
1
]
elif
e
[
0
]
==
"replace"
:
translated_body
[
e
[
1
]
+
added_lines
:
e
[
2
]
+
added_lines
]
=
e
[
5
]
elif
e
[
0
]
==
"insert"
:
translated_body
[
e
[
1
]
+
added_lines
:
e
[
2
]
+
added_lines
]
=
e
[
5
]
added_lines
+=
e
[
4
]
-
e
[
3
]
else
:
assert
False
# dst_pad.write(src_pad.read())
pass
# write
dst_pad
.
write
(
joined_header
+
joined_leading_space_lines
+
'
\n
'
.
join
(
translated_body
))
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment