Now that \gls and \acr parsing is (more) functional, no need to handle the tags during dict creation

2021-08-27 15:00:28 +02:00 · 2021-08-27 15:00:28 +02:00 · 36d1cf9c93
commit 36d1cf9c93
parent b3aa98132d
2 changed files with 13 additions and 22 deletions
--- a/.talismanrc
+++ b/.talismanrc
@ -1,6 +1,6 @@
 fileignoreconfig:
 - filename: src/flytex2html.py
-  checksum: 7acbf2bba73f07f2ecec477fe5b102fbb96147964b056d5233a79c55788c21a1
+  checksum: 91f55dfaea9cc1358f01bf0aa4927e5df82e585e42f10429df8696a04a589737
 - filename: src/tests/test_environments.py
  ignore_detectors:
  - filename
--- a/src/flytex2html.py
+++ b/src/flytex2html.py
@ -141,6 +141,14 @@ class Converter:
                content = self.replace_acronyms(content)
                content = self.replace_glossary_entries(content)
                acronym[attribute] = content
+        for entry in self.glossary.values():
+            for attribute, content in entry.items():
+                content = self.replace_acronyms(content)
+                content = self.replace_glossary_entries(content)
+                if attribute != "description":
+                    # remove link, should only be contained in description
+                    content = re.sub(r"<a href=\".*?\">(.*?)</a>", r"\1", content)
+                entry[attribute] = content

        self.latex_content = self.replace_acronyms(self.latex_content)
        self.latex_content = self.replace_glossary_entries(self.latex_content)
@ -334,8 +342,6 @@ class Converter:
            for entry, content in re.findall(
                newglossary_pattern, extracted_glossaryentry, flags=re.DOTALL
            ):
-                content = self.replace_acronyms(content)
-                content = self.replace_glossary_entries(content, ignore_missing_keys=True)
                entry_content = self._parse_gls_content(content)
                glossary[entry] = {
                    "name": entry_content["name"],
@ -372,14 +378,12 @@ class Converter:
            )
            if not extracted_dualentry:
                break
-            extracted_dualentry = self.replace_acronyms(extracted_dualentry)
-            extracted_dualentry = self.replace_glossary_entries(extracted_dualentry, ignore_missing_keys=True)
            glossary = {
                entry: {
                    "name": name,
                    "text": name,
                    "description": "\n".join(
-                        self.replace_acronyms(line).strip()
+                        line.strip()
                        for line in description.splitlines()
                        if line.strip()
                    ),
@ -706,9 +710,7 @@ class Converter:

        return latex_string

-    def replace_glossary_entries(
-        self, latex_string: str, ignore_missing_keys: bool = False
-    ) -> str:
+    def replace_glossary_entries(self, latex_string: str) -> str:
        """Replace \\gls, \\Gls, \\glspl, \\Glspl, \\glssymbol and \\glsdesc.

        Return the updated string.
@ -725,12 +727,7 @@ class Converter:
            try:
                found_entry = self.glossary[entry]
            except KeyError:
-                try:
-                    found_entry = self.acronyms[entry]
-                except KeyError as error:
-                    if ignore_missing_keys:
-                        continue
-                    raise KeyError from error
+                found_entry = self.acronyms[entry]

            regex = match.string[match.start() : match.end()]
            if regex.lower().startswith(r"\glsdesc"):
@ -1397,7 +1394,6 @@ class Converter:
        verbose("    - parsing content of glossary entry...")

        pattern = r"(?:\n* *)?(\w+)=\{?(.*?)(?:\}|,)(?:\n* *)?"
-        href_pattern = r"<a href=\".*?\">(.*?)</a>"

        entry_content = {
            "name": "",
@ -1416,12 +1412,7 @@ class Converter:
                remove_extracted=False,
            )
            content = content.replace(tag, "")
-            tag = tag.replace(f"{key}=", "")[:-1]
-            tag = tag.lstrip("{")
-            # for some keys, remove link, should not be contained in key
-            if key != "description":
-                tag = re.sub(href_pattern, r"\1", tag)
-            entry_content[key] = tag
+            entry_content[key] = tag.replace(f"{key}=", "")[:-1].lstrip("{")

        for key, value in re.findall(pattern, content, flags=re.DOTALL):
            if key not in entry_content.keys():