Now that \gls and \acr parsing is (more) functional, no need to handle the tags during dict creation
This commit is contained in:
parent
b3aa98132d
commit
36d1cf9c93
2 changed files with 13 additions and 22 deletions
|
@ -1,6 +1,6 @@
|
|||
fileignoreconfig:
|
||||
- filename: src/flytex2html.py
|
||||
checksum: 7acbf2bba73f07f2ecec477fe5b102fbb96147964b056d5233a79c55788c21a1
|
||||
checksum: 91f55dfaea9cc1358f01bf0aa4927e5df82e585e42f10429df8696a04a589737
|
||||
- filename: src/tests/test_environments.py
|
||||
ignore_detectors:
|
||||
- filename
|
||||
|
|
|
@ -141,6 +141,14 @@ class Converter:
|
|||
content = self.replace_acronyms(content)
|
||||
content = self.replace_glossary_entries(content)
|
||||
acronym[attribute] = content
|
||||
for entry in self.glossary.values():
|
||||
for attribute, content in entry.items():
|
||||
content = self.replace_acronyms(content)
|
||||
content = self.replace_glossary_entries(content)
|
||||
if attribute != "description":
|
||||
# remove link, should only be contained in description
|
||||
content = re.sub(r"<a href=\".*?\">(.*?)</a>", r"\1", content)
|
||||
entry[attribute] = content
|
||||
|
||||
self.latex_content = self.replace_acronyms(self.latex_content)
|
||||
self.latex_content = self.replace_glossary_entries(self.latex_content)
|
||||
|
@ -334,8 +342,6 @@ class Converter:
|
|||
for entry, content in re.findall(
|
||||
newglossary_pattern, extracted_glossaryentry, flags=re.DOTALL
|
||||
):
|
||||
content = self.replace_acronyms(content)
|
||||
content = self.replace_glossary_entries(content, ignore_missing_keys=True)
|
||||
entry_content = self._parse_gls_content(content)
|
||||
glossary[entry] = {
|
||||
"name": entry_content["name"],
|
||||
|
@ -372,14 +378,12 @@ class Converter:
|
|||
)
|
||||
if not extracted_dualentry:
|
||||
break
|
||||
extracted_dualentry = self.replace_acronyms(extracted_dualentry)
|
||||
extracted_dualentry = self.replace_glossary_entries(extracted_dualentry, ignore_missing_keys=True)
|
||||
glossary = {
|
||||
entry: {
|
||||
"name": name,
|
||||
"text": name,
|
||||
"description": "\n".join(
|
||||
self.replace_acronyms(line).strip()
|
||||
line.strip()
|
||||
for line in description.splitlines()
|
||||
if line.strip()
|
||||
),
|
||||
|
@ -706,9 +710,7 @@ class Converter:
|
|||
|
||||
return latex_string
|
||||
|
||||
def replace_glossary_entries(
|
||||
self, latex_string: str, ignore_missing_keys: bool = False
|
||||
) -> str:
|
||||
def replace_glossary_entries(self, latex_string: str) -> str:
|
||||
"""Replace \\gls, \\Gls, \\glspl, \\Glspl, \\glssymbol and \\glsdesc.
|
||||
|
||||
Return the updated string.
|
||||
|
@ -725,12 +727,7 @@ class Converter:
|
|||
try:
|
||||
found_entry = self.glossary[entry]
|
||||
except KeyError:
|
||||
try:
|
||||
found_entry = self.acronyms[entry]
|
||||
except KeyError as error:
|
||||
if ignore_missing_keys:
|
||||
continue
|
||||
raise KeyError from error
|
||||
found_entry = self.acronyms[entry]
|
||||
|
||||
regex = match.string[match.start() : match.end()]
|
||||
if regex.lower().startswith(r"\glsdesc"):
|
||||
|
@ -1397,7 +1394,6 @@ class Converter:
|
|||
verbose(" - parsing content of glossary entry...")
|
||||
|
||||
pattern = r"(?:\n* *)?(\w+)=\{?(.*?)(?:\}|,)(?:\n* *)?"
|
||||
href_pattern = r"<a href=\".*?\">(.*?)</a>"
|
||||
|
||||
entry_content = {
|
||||
"name": "",
|
||||
|
@ -1416,12 +1412,7 @@ class Converter:
|
|||
remove_extracted=False,
|
||||
)
|
||||
content = content.replace(tag, "")
|
||||
tag = tag.replace(f"{key}=", "")[:-1]
|
||||
tag = tag.lstrip("{")
|
||||
# for some keys, remove link, should not be contained in key
|
||||
if key != "description":
|
||||
tag = re.sub(href_pattern, r"\1", tag)
|
||||
entry_content[key] = tag
|
||||
entry_content[key] = tag.replace(f"{key}=", "")[:-1].lstrip("{")
|
||||
|
||||
for key, value in re.findall(pattern, content, flags=re.DOTALL):
|
||||
if key not in entry_content.keys():
|
||||
|
|
Loading…
Reference in a new issue