Now that \gls and \acr parsing is (more) functional, no need to handle the tags during dict creation

This commit is contained in:
flyingscorpio@arch-desktop 2021-08-27 15:00:28 +02:00
parent b3aa98132d
commit 36d1cf9c93
2 changed files with 13 additions and 22 deletions

View file

@ -1,6 +1,6 @@
fileignoreconfig:
- filename: src/flytex2html.py
checksum: 7acbf2bba73f07f2ecec477fe5b102fbb96147964b056d5233a79c55788c21a1
checksum: 91f55dfaea9cc1358f01bf0aa4927e5df82e585e42f10429df8696a04a589737
- filename: src/tests/test_environments.py
ignore_detectors:
- filename

View file

@ -141,6 +141,14 @@ class Converter:
content = self.replace_acronyms(content)
content = self.replace_glossary_entries(content)
acronym[attribute] = content
for entry in self.glossary.values():
for attribute, content in entry.items():
content = self.replace_acronyms(content)
content = self.replace_glossary_entries(content)
if attribute != "description":
# remove link, should only be contained in description
content = re.sub(r"<a href=\".*?\">(.*?)</a>", r"\1", content)
entry[attribute] = content
self.latex_content = self.replace_acronyms(self.latex_content)
self.latex_content = self.replace_glossary_entries(self.latex_content)
@ -334,8 +342,6 @@ class Converter:
for entry, content in re.findall(
newglossary_pattern, extracted_glossaryentry, flags=re.DOTALL
):
content = self.replace_acronyms(content)
content = self.replace_glossary_entries(content, ignore_missing_keys=True)
entry_content = self._parse_gls_content(content)
glossary[entry] = {
"name": entry_content["name"],
@ -372,14 +378,12 @@ class Converter:
)
if not extracted_dualentry:
break
extracted_dualentry = self.replace_acronyms(extracted_dualentry)
extracted_dualentry = self.replace_glossary_entries(extracted_dualentry, ignore_missing_keys=True)
glossary = {
entry: {
"name": name,
"text": name,
"description": "\n".join(
self.replace_acronyms(line).strip()
line.strip()
for line in description.splitlines()
if line.strip()
),
@ -706,9 +710,7 @@ class Converter:
return latex_string
def replace_glossary_entries(
self, latex_string: str, ignore_missing_keys: bool = False
) -> str:
def replace_glossary_entries(self, latex_string: str) -> str:
"""Replace \\gls, \\Gls, \\glspl, \\Glspl, \\glssymbol and \\glsdesc.
Return the updated string.
@ -725,12 +727,7 @@ class Converter:
try:
found_entry = self.glossary[entry]
except KeyError:
try:
found_entry = self.acronyms[entry]
except KeyError as error:
if ignore_missing_keys:
continue
raise KeyError from error
found_entry = self.acronyms[entry]
regex = match.string[match.start() : match.end()]
if regex.lower().startswith(r"\glsdesc"):
@ -1397,7 +1394,6 @@ class Converter:
verbose(" - parsing content of glossary entry...")
pattern = r"(?:\n* *)?(\w+)=\{?(.*?)(?:\}|,)(?:\n* *)?"
href_pattern = r"<a href=\".*?\">(.*?)</a>"
entry_content = {
"name": "",
@ -1416,12 +1412,7 @@ class Converter:
remove_extracted=False,
)
content = content.replace(tag, "")
tag = tag.replace(f"{key}=", "")[:-1]
tag = tag.lstrip("{")
# for some keys, remove link, should not be contained in key
if key != "description":
tag = re.sub(href_pattern, r"\1", tag)
entry_content[key] = tag
entry_content[key] = tag.replace(f"{key}=", "")[:-1].lstrip("{")
for key, value in re.findall(pattern, content, flags=re.DOTALL):
if key not in entry_content.keys():