Merge branch 'new-private-person-replacement-logic' into march-tweaks
This commit is contained in:
commit
8fb496bbfe
191
tzk/builders.py
191
tzk/builders.py
@ -19,7 +19,7 @@ import re
|
|||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
from typing import Callable, Dict, List, Optional, Set, Sequence, Tuple
|
from typing import Callable, Dict, Generator, List, Optional, Set, Sequence, Tuple
|
||||||
|
|
||||||
from tzk import git
|
from tzk import git
|
||||||
from tzk import tw
|
from tzk import tw
|
||||||
@ -359,8 +359,159 @@ def _private_people_replacement_table(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _privatize_line(line: str, replacement_table: Dict[str, str],
|
||||||
|
replace_link_text: bool = False) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Given a line and a table of replacements to make, replace all instances
|
||||||
|
of all private people defined in the replacement table.
|
||||||
|
|
||||||
|
Basics:
|
||||||
|
>>> _privatize_line("MsAlice is a test person.", {'MsAlice': 'A.'})
|
||||||
|
'<<privateperson "A.">> is a test person.'
|
||||||
|
|
||||||
|
>>> _privatize_line("This woman, known as MsAlice, is a test person.", \
|
||||||
|
{'MsAlice': 'A.'})
|
||||||
|
'This woman, known as <<privateperson "A.">>, is a test person.'
|
||||||
|
|
||||||
|
>>> _privatize_line("[[MsAlice]] is a test person.", {'MsAlice': 'A.'})
|
||||||
|
'[[A.|PrivatePerson]] is a test person.'
|
||||||
|
|
||||||
|
>>> _privatize_line("When we talk about [[MsAlice]] in the middle of a " \
|
||||||
|
"sentence, that's fine too.", {'MsAlice': 'A.'})
|
||||||
|
"When we talk about [[A.|PrivatePerson]] in the middle of a sentence, that's fine too."
|
||||||
|
|
||||||
|
Links with different text and target:
|
||||||
|
>>> _privatize_line("We can talk about [[Alice|MsAlice]] " \
|
||||||
|
"with different text.", {'MsAlice': 'A.'})
|
||||||
|
'We can talk about [[Alice|PrivatePerson]] with different text.'
|
||||||
|
|
||||||
|
Multiple replacements with different people:
|
||||||
|
>>> _privatize_line("We can have [[MsAlice]] and MrBob talk to each other " \
|
||||||
|
"in the same line.", {'MsAlice': 'A.', 'MrBob': 'B.'})
|
||||||
|
'We can have [[A.|PrivatePerson]] and <<privateperson "B.">> talk to each other in the same line.'
|
||||||
|
|
||||||
|
Multiple replacements with the same person:
|
||||||
|
>>> _privatize_line("We can have MsAlice talk to herself (MsAlice) " \
|
||||||
|
"in the same line.", {'MsAlice': 'A.'})
|
||||||
|
'We can have <<privateperson "A.">> talk to herself (<<privateperson "A.">>) in the same line.'
|
||||||
|
|
||||||
|
>>> _privatize_line("Likewise [[MsAlice]] can do it with brackets " \
|
||||||
|
"([[MsAlice]]).", {'MsAlice': 'A.'})
|
||||||
|
'Likewise [[A.|PrivatePerson]] can do it with brackets ([[A.|PrivatePerson]]).'
|
||||||
|
|
||||||
|
>>> _privatize_line('We can talk about [[Alice|MsAlice]] lots of ways, ' \
|
||||||
|
'like MsAlice and [[MsAlice]].', {'MsAlice': 'A.'})
|
||||||
|
'We can talk about [[Alice|PrivatePerson]] lots of ways, like <<privateperson "A.">> and [[A.|PrivatePerson]].'
|
||||||
|
|
||||||
|
Replacements with alternate link text:
|
||||||
|
>>> _privatize_line('We can talk about [[Alice|MsAlice]] and [[Bob|MrBob]] as well', \
|
||||||
|
{'MsAlice': 'A.', 'MrBob': 'B.'}, replace_link_text=True)
|
||||||
|
'We can talk about [[A.|PrivatePerson]] and [[B.|PrivatePerson]] as well'
|
||||||
|
|
||||||
|
|
||||||
|
We don't want to replace places where a CamelCase match is a substring of another
|
||||||
|
word. This is expected to yield no output because there's nothing to replace:
|
||||||
|
>>> _privatize_line("But an EmbeddedCamelWithMsAliceInIt isn't her.", \
|
||||||
|
{'MsAlice': 'A.'})
|
||||||
|
"""
|
||||||
|
def iteroccurrences(needle: str) -> Generator[int, int, None]:
|
||||||
|
"""
|
||||||
|
Iterate over the start indices of occurrences of substring
|
||||||
|
``needle`` in the line /line/ in outer scope.
|
||||||
|
|
||||||
|
(We have to use outer scope because it can be changed while we're iterating
|
||||||
|
and the generator is only bound to arguments once.)
|
||||||
|
"""
|
||||||
|
idx = -1
|
||||||
|
while True:
|
||||||
|
idx = line.find(needle, idx + 1)
|
||||||
|
if idx == -1:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
additional_increments = yield idx
|
||||||
|
if additional_increments is not None:
|
||||||
|
idx += additional_increments
|
||||||
|
|
||||||
|
def anchored_at_one_end(start_index: int, end_index: int) -> bool:
|
||||||
|
return start_index == 0 or end_index == len(line)
|
||||||
|
|
||||||
|
def is_camelcase_link(start_index: int, end_index: int) -> bool:
|
||||||
|
return (anchored_at_one_end(start_index, end_index)
|
||||||
|
or (line[start_index-1] != '[' and line[end_index] != ']'))
|
||||||
|
|
||||||
|
def is_bare_bracketed_link(start_index: int, end_index: int) -> bool:
|
||||||
|
return (not anchored_at_one_end(start_index, end_index)
|
||||||
|
and line[start_index-2:start_index] == '[['
|
||||||
|
and line[end_index:end_index+2] == ']]')
|
||||||
|
|
||||||
|
def is_textual_bracketed_link(start_index: int, end_index: int) -> bool:
|
||||||
|
return (not anchored_at_one_end(start_index, end_index)
|
||||||
|
and line[start_index-1] == '|'
|
||||||
|
and line[end_index:end_index+2] == ']]')
|
||||||
|
|
||||||
|
dirty = False
|
||||||
|
increment_iterator_by = 0
|
||||||
|
for replace_person, replace_initials in replacement_table.items():
|
||||||
|
iterator = iteroccurrences(replace_person)
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
# NOTE: the "end" index is one after the last index in the string,
|
||||||
|
# as is needed for slice notation.
|
||||||
|
if increment_iterator_by:
|
||||||
|
start_idx = iterator.send(increment_iterator_by)
|
||||||
|
increment_iterator_by = 0
|
||||||
|
else:
|
||||||
|
start_idx = next(iterator)
|
||||||
|
end_idx = start_idx + len(replace_person)
|
||||||
|
new_line = None
|
||||||
|
|
||||||
|
if is_camelcase_link(start_idx, end_idx):
|
||||||
|
# camel-case link or unlinked reference in text
|
||||||
|
def is_spurious_substring():
|
||||||
|
# If there's not a non-alphanumeric character on both sides of
|
||||||
|
# the "link", we may be making a clbuttic replacement.
|
||||||
|
# <https://en.wikipedia.org/wiki/Scunthorpe_problem>
|
||||||
|
start_ok = start_idx == 0 or not line[start_idx-1].isalnum()
|
||||||
|
end_ok = end_idx == len(line) or not line[end_idx].isalnum()
|
||||||
|
return not (start_ok and end_ok)
|
||||||
|
|
||||||
|
if not is_spurious_substring():
|
||||||
|
new_line = (line[0:start_idx]
|
||||||
|
+ f'<<privateperson "{replace_initials}">>'
|
||||||
|
+ line[end_idx:])
|
||||||
|
elif is_bare_bracketed_link(start_idx, end_idx):
|
||||||
|
# link with the person as the target and text
|
||||||
|
replacement = replace_initials + '|PrivatePerson'
|
||||||
|
new_line = line[0:start_idx] + replacement + line[end_idx:]
|
||||||
|
elif is_textual_bracketed_link(start_idx, end_idx):
|
||||||
|
# link with the person as the target only;
|
||||||
|
# beware that you might have put something private in the text
|
||||||
|
if replace_link_text:
|
||||||
|
start_of_link = line[0:start_idx].rfind('[[', 0, start_idx) + 2
|
||||||
|
new_line = line[0:start_of_link] + f"{replace_initials}|PrivatePerson" + line[end_idx:]
|
||||||
|
else:
|
||||||
|
new_line = line[0:start_idx] + 'PrivatePerson' + line[end_idx:]
|
||||||
|
else:
|
||||||
|
link = line[start_idx:end_idx]
|
||||||
|
raise ValueError("Unknown type of link '{link}'.")
|
||||||
|
|
||||||
|
if new_line:
|
||||||
|
line = new_line
|
||||||
|
dirty = True
|
||||||
|
# If we changed the length of the string by modifying it,
|
||||||
|
# we need to update our stored position within the string.
|
||||||
|
increment_iterator_by = len(new_line) - len(line)
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if dirty:
|
||||||
|
return line
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@tzk_builder
|
@tzk_builder
|
||||||
def replace_private_people(initialer: Callable[[str], str] = None) -> None:
|
def replace_private_people(initialer: Callable[[str], str] = None, replace_link_text: bool = False) -> None:
|
||||||
"""
|
"""
|
||||||
Replace the names of people who are not marked Public with their initials.
|
Replace the names of people who are not marked Public with their initials.
|
||||||
|
|
||||||
@ -384,6 +535,20 @@ def replace_private_people(initialer: Callable[[str], str] = None) -> None:
|
|||||||
that takes one string argument
|
that takes one string argument
|
||||||
(a tiddler filename without the full path, e.g., ``MsJaneDoe.tid``)
|
(a tiddler filename without the full path, e.g., ``MsJaneDoe.tid``)
|
||||||
and returns a string to be considered the "initials" of that person.
|
and returns a string to be considered the "initials" of that person.
|
||||||
|
|
||||||
|
:param replace_link_text: If you have links in the form
|
||||||
|
``So then [[John said|MrJohnDoe]] something about this``,
|
||||||
|
then enabling this option ensures that the link is fully
|
||||||
|
replaced with
|
||||||
|
``So then [[J.D.|PrivatePerson]] something about this``.
|
||||||
|
This means that when using this feature, having the
|
||||||
|
link text also be meaningful after redaction is important.
|
||||||
|
|
||||||
|
.. warning ::
|
||||||
|
Using this link replacement feature does not redact everything, just the link
|
||||||
|
(and the link text with `replace_link_text` enabled). So *do not* rely on it
|
||||||
|
for redacting everything. Making a tiddler public still needs consideration and
|
||||||
|
tooling is there to help, not to replace your own judgment.
|
||||||
"""
|
"""
|
||||||
assert 'public_wiki_folder' in build_state
|
assert 'public_wiki_folder' in build_state
|
||||||
|
|
||||||
@ -395,25 +560,9 @@ def replace_private_people(initialer: Callable[[str], str] = None) -> None:
|
|||||||
with tiddler.open() as f:
|
with tiddler.open() as f:
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
for i in range(len(lines)):
|
for i in range(len(lines)):
|
||||||
for replace_person, replace_initials in replacement_table.items():
|
private_line = _privatize_line(lines[i], replacement_table, replace_link_text)
|
||||||
if replace_person in lines[i]:
|
if private_line is not None:
|
||||||
if '|' + replace_person + ']]' in lines[i]:
|
lines[i] = private_line
|
||||||
# link with the person as the target only;
|
|
||||||
# beware that you might have put something private in the text
|
|
||||||
lines[i] = lines[i].replace(replace_person, 'PrivatePerson')
|
|
||||||
elif '[[' + replace_person + ']]' in lines[i]:
|
|
||||||
# link with the person as the target and text
|
|
||||||
lines[i] = lines[i].replace(
|
|
||||||
replace_person,
|
|
||||||
replace_initials + '|PrivatePerson')
|
|
||||||
else:
|
|
||||||
# camel-case link or unlinked reference in text;
|
|
||||||
# or spurious substring, so rule that out with the '\b' search
|
|
||||||
lines[i] = re.sub(
|
|
||||||
r"\b" + re.escape(replace_person) + r"\b",
|
|
||||||
f'<<privateperson "{replace_initials}">>',
|
|
||||||
lines[i]
|
|
||||||
)
|
|
||||||
dirty = True
|
dirty = True
|
||||||
if dirty:
|
if dirty:
|
||||||
with tiddler.open("w") as f:
|
with tiddler.open("w") as f:
|
||||||
|
Loading…
Reference in New Issue
Block a user