Fix get_aflow_label_from_spglib (#75)

janosh · web-flow · commit e43685f359c4 · 2023-08-09T18:43:39.000-07:00
* pre-commit autoupdate and run --all-files

* fix UnboundLocalError: local variable 'aflow_label_with_chemsys' referenced before assignment if try case raises

* fix mypy

* ruff unignore and fix RET504 unnecessary-assign

* ruff unignore and fix D107 Missing docstring in __init__

* ruff unignore and fix B904
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -5,7 +5,7 @@ ci:
 
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.276
+    rev: v0.0.284
     hooks:
       - id: ruff
         args: [--fix]
@@ -28,7 +28,7 @@ repos:
         exclude_types: [json]
 
   - repo: https://github.com/psf/black
-    rev: 23.3.0
+    rev: 23.7.0
     hooks:
       - id: black-jupyter
 
diff --git a/aviary/cgcnn/model.py b/aviary/cgcnn/model.py
@@ -129,6 +129,15 @@ def __init__(
         elem_fea_len: int = 64,
         n_graph: int = 4,
     ) -> None:
+        """Initialize DescriptorNetwork.
+
+        Args:
+            elem_emb_len (int): Number of atom features in the input.
+            nbr_fea_len (int): Number of bond features.
+            elem_fea_len (int, optional): Number of hidden atom features in the graph convolution
+                layers. Defaults to 64.
+            n_graph (int, optional): Number of graph convolution layers. Defaults to 4.
+        """
         super().__init__()
 
         self.embedding = nn.Linear(elem_emb_len, elem_fea_len)
@@ -222,6 +231,4 @@ def forward(
         nbr_summed = scatter_add(nbr_msg, self_idx, dim=0)
 
         nbr_summed = self.bn2(nbr_summed)
-        out = self.softplus2(atom_in_fea + nbr_summed)
-
-        return out
+        return self.softplus2(atom_in_fea + nbr_summed)
diff --git a/aviary/core.py b/aviary/core.py
@@ -411,6 +411,7 @@ class Normalizer:
     """Normalize a Tensor and restore it later."""
 
     def __init__(self) -> None:
+        """Initialize Normalizer with mean 0 and std 1."""
         self.mean = torch.tensor(0)
         self.std = torch.tensor(1)
 
@@ -579,8 +580,7 @@ def masked_std(x: Tensor, mask: BoolTensor, dim: int = 0, eps: float = 1e-12) ->
     mean = masked_mean(x, mask, dim=dim)
     squared_diff = (x - mean.unsqueeze(dim=dim)) ** 2
     var = masked_mean(squared_diff, mask, dim=dim)
-    std = (var + eps).sqrt()
-    return std
+    return (var + eps).sqrt()
 
 
 def masked_mean(x: Tensor, mask: BoolTensor, dim: int = 0) -> Tensor:
diff --git a/aviary/predict.py b/aviary/predict.py
@@ -72,14 +72,14 @@ def make_ensemble_predictions(
     ):
         try:
             checkpoint = torch.load(checkpoint_path, map_location=device)
-        except Exception as exc:  # noqa: PERF203
+        except Exception as exc:
             raise RuntimeError(f"Failed to load {checkpoint_path=}") from exc
 
         model_params = checkpoint.get("model_params")
         if model_params is None:
             raise ValueError(f"model_params not found in {checkpoint_path=}")
 
-        target_name, task_type = list(model_params["task_dict"].items())[0]
+        target_name, task_type = next(iter(model_params["task_dict"].items()))
         assert task_type in ("regression", "classification"), f"invalid {task_type = }"
         if warn_target_mismatch and target_name != target_col:
             print(
diff --git a/aviary/roost/data.py b/aviary/roost/data.py
@@ -55,7 +55,7 @@ def __init__(
         with open(elem_embedding) as file:
             self.elem_features = json.load(file)
 
-        self.elem_emb_len = len(list(self.elem_features.values())[0])
+        self.elem_emb_len = len(next(iter(self.elem_features.values())))
 
         self.n_targets = []
         for target, task in self.task_dict.items():
@@ -98,14 +98,14 @@ def __getitem__(self, idx: int):
 
         try:
             elem_fea = np.vstack([self.elem_features[element] for element in elements])
-        except AssertionError:
+        except AssertionError as exc:
             raise AssertionError(
                 f"{material_ids} ({composition}) contains element types not in embedding"
-            )
-        except ValueError:
+            ) from exc
+        except ValueError as exc:
             raise ValueError(
                 f"{material_ids} ({composition}) composition cannot be parsed into elements"
-            )
+            ) from exc
 
         nele = len(elements)
         self_idx = []
diff --git a/aviary/segments.py b/aviary/segments.py
@@ -40,9 +40,7 @@ def forward(self, x: Tensor, index: Tensor) -> Tensor:
         gate = gate / (scatter_add(gate, index, dim=0)[index] + 1e-10)
 
         x = self.message_nn(x)
-        out = scatter_add(gate * x, index, dim=0)
-
-        return out
+        return scatter_add(gate * x, index, dim=0)
 
     def __repr__(self) -> str:
         gate_nn, message_nn = self.gate_nn, self.message_nn
@@ -82,9 +80,7 @@ def forward(self, x: Tensor, index: Tensor, weights: Tensor) -> Tensor:
         gate = gate / (scatter_add(gate, index, dim=0)[index] + 1e-10)
 
         x = self.message_nn(x)
-        out = scatter_add(gate * x, index, dim=0)
-
-        return out
+        return scatter_add(gate * x, index, dim=0)
 
     def __repr__(self) -> str:
         pow, gate_nn, message_nn = float(self.pow), self.gate_nn, self.message_nn
diff --git a/aviary/utils.py b/aviary/utils.py
@@ -805,9 +805,7 @@ def get_metrics(
         class1_probas = predictions[:, 1]
         metrics["ROCAUC"] = roc_auc_score(targets, class1_probas)
 
-    metrics = {key: round(float(val), prec) for key, val in metrics.items()}
-
-    return metrics
+    return {key: round(float(val), prec) for key, val in metrics.items()}
 
 
 def as_dict_handler(obj: Any) -> dict[str, Any] | None:
diff --git a/aviary/wren/data.py b/aviary/wren/data.py
@@ -59,15 +59,17 @@ def __init__(
         with open(elem_embedding) as emb_file:
             self.elem_features = json.load(emb_file)
 
-        self.elem_emb_len = len(list(self.elem_features.values())[0])
+        self.elem_emb_len = len(next(iter(self.elem_features.values())))
 
         if sym_emb in ["bra-alg-off", "spg-alg-off"]:
             sym_emb = f"{PKG_DIR}/embeddings/wyckoff/{sym_emb}.json"
 
         with open(sym_emb) as sym_file:
             self.sym_features = json.load(sym_file)
 
-        self.sym_emb_len = len(list(list(self.sym_features.values())[0].values())[0])
+        self.sym_emb_len = len(
+            next(iter(next(iter(self.sym_features.values())).values()))
+        )
 
         self.n_targets = []
         for target, task in self.task_dict.items():
diff --git a/aviary/wren/model.py b/aviary/wren/model.py
@@ -236,7 +236,7 @@ def forward(
             aug_cry_idx (Tensor): Mapping from the crystal idx to augmentation idx
 
         Returns:
-            Tensor: returns the crystal features of the materials in the batch
+            Tensor: crystal features of the materials in the batch
         """
         # embed the original features into the graph layer description
         elem_fea = self.elem_embed(elem_fea)
@@ -254,12 +254,10 @@ def forward(
             for attnhead in self.cry_pool
         ]
 
-        crystal_features = scatter_mean(
+        return scatter_mean(
             torch.mean(torch.stack(head_fea), dim=0), aug_cry_idx, dim=0
         )
 
-        return crystal_features
-
     def __repr__(self) -> str:
         return (
             f"{type(self).__name__}(n_graph={len(self.graphs)}, cry_heads="
diff --git a/aviary/wren/utils.py b/aviary/wren/utils.py
@@ -177,14 +177,12 @@ def get_aflow_label_from_spglib(
             aflow_label_with_chemsys = get_aflow_label_from_spg_analyzer(
                 spg_analyzer, errors
             )
+        return aflow_label_with_chemsys
 
     except ValueError as exc:
-        if errors == "raise":
-            raise
         if errors == "annotate":
             return f"invalid spglib: {exc}"
-
-    return aflow_label_with_chemsys
+        raise  # we only get here if errors == "raise"
 
 
 def get_aflow_label_from_spg_analyzer(
@@ -297,9 +295,7 @@ def canonicalize_elem_wyks(elem_wyks: str, spg_num: int) -> str:
         scores.append(score)
         sorted_iso.append(sorted_el_wyks)
 
-    canonical = sorted(zip(scores, sorted_iso), key=lambda x: (x[0], x[1]))[0][1]
-
-    return canonical
+    return sorted(zip(scores, sorted_iso), key=lambda x: (x[0], x[1]))[0][1]
 
 
 def sort_and_score_wyks(wyks: str) -> tuple[str, int]:
@@ -372,8 +368,6 @@ def count_wyckoff_positions(aflow_label: str) -> int:
     Returns:
         int: number of distinct Wyckoff positions
     """
-    num_wyk = 0
-
     aflow_label, _ = aflow_label.split(":")  # remove chemical system
     # discard prototype formula and spg symbol and spg number
     wyk_letters = aflow_label.split("_", maxsplit=3)[-1]
@@ -382,9 +376,7 @@ def count_wyckoff_positions(aflow_label: str) -> int:
     wyk_list = re.split("[A-z]", wyk_letters)[:-1]  # split on every letter
 
     # count 1 for letters without prefix
-    num_wyk = sum(1 if len(x) == 0 else int(x) for x in wyk_list)
-
-    return num_wyk
+    return sum(1 if len(x) == 0 else int(x) for x in wyk_list)
 
 
 def count_crystal_dof(aflow_label: str) -> int:
@@ -488,5 +480,4 @@ def count_distinct_wyckoff_letters(aflow_str: str) -> int:
     aflow_str, _ = aflow_str.split(":")  # drop chemical system
     _, _, _, wyckoff_letters = aflow_str.split("_", 3)  # drop prototype, Pearson, spg
     wyckoff_letters = wyckoff_letters.translate(remove_digits).replace("_", "")
-    n_uniq = len(set(wyckoff_letters))
-    return n_uniq
+    return len(set(wyckoff_letters))  # number of distinct Wyckoff letters
diff --git a/aviary/wrenformer/data.py b/aviary/wrenformer/data.py
@@ -94,12 +94,10 @@ def wyckoff_embedding_from_aflow_str(wyckoff_str: str) -> Tensor:
     )
     element_ratios = element_ratios.repeat(n_augments, 1, 1)
 
-    combined_features = torch.cat(
+    return torch.cat(  # combined features
         [element_ratios, element_features, symmetry_features], dim=-1
     ).float()
 
-    return combined_features
-
 
 def get_composition_embedding(formula: str) -> Tensor:
     """Concatenate matscholar element embeddings with element ratios in composition.
@@ -121,9 +119,8 @@ def get_composition_embedding(formula: str) -> Tensor:
     element_ratios = torch.tensor(elem_weights)
     element_features = torch.tensor(element_features)
 
-    combined_features = torch.cat([element_ratios, element_features], dim=1).float()
-
-    return combined_features
+    # combined features
+    return torch.cat([element_ratios, element_features], dim=1).float()
 
 
 def df_to_in_mem_dataloader(
diff --git a/examples/inputs/poscar_to_df.py b/examples/inputs/poscar_to_df.py
@@ -1,4 +1,6 @@
 # %%
+from __future__ import annotations
+
 import glob
 import os
 
diff --git a/examples/wrenformer/mat_bench/make_plots.py b/examples/wrenformer/mat_bench/make_plots.py
@@ -1,4 +1,6 @@
 # %%
+from __future__ import annotations
+
 import json
 import logging
 import re
diff --git a/examples/wrenformer/mat_bench/plotting_functions.py b/examples/wrenformer/mat_bench/plotting_functions.py
@@ -1,16 +1,20 @@
-from typing import Any, Optional
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
-import pandas as pd
 import plotly.express as px
 import plotly.graph_objs as go
 import plotly.io as pio
 from matbench.constants import CLF_KEY, REG_KEY
 from matbench.metadata import mbv01_metadata
 from matbench.metadata import mbv01_metadata as matbench_metadata
-from plotly.graph_objs._figure import Figure
 from sklearn.metrics import accuracy_score, auc, roc_curve
 
+if TYPE_CHECKING:
+    import pandas as pd
+    from plotly.graph_objs._figure import Figure
+
 __author__ = "Janosh Riebesell"
 __date__ = "2022-04-25"
 
@@ -79,7 +83,7 @@ def scale_clf_task(series: pd.Series) -> pd.Series:
 
 
 def plot_leaderboard(
-    df: pd.DataFrame, html_path: Optional[str] = None, **kwargs: Any
+    df: pd.DataFrame, html_path: str | None = None, **kwargs: Any
 ) -> Figure:
     """Generate the Matbench scaled errors graph seen on
     https://matbench.materialsproject.org. Adapted from https://bit.ly/38fDdgt.
diff --git a/pyproject.toml b/pyproject.toml
@@ -105,19 +105,16 @@ select = [
   "YTT",  # flake8-2020
 ]
 ignore = [
-  "B904",    # Within an except clause, raise exceptions with raise ... from err
   "C408",    # Unnecessary dict call - rewrite as a literal
   "D100",    # Missing docstring in public module
   "D104",    # Missing docstring in public package
   "D105",    # Missing docstring in magic method
-  "D107",    # Missing docstring in __init__
   "D205",    # 1 blank line required between summary line and description
   "E731",    # Do not assign a lambda expression, use a def
   "PD901",   # pandas-df-variable-name
   "PLC1901", # compare-to-empty-string
   "PLR",     # pylint refactor
   "PT006",   # pytest-parametrize-names-wrong-type
-  "RET504",  # unnecessary-assign
 ]
 pydocstyle.convention = "google"
 isort.known-third-party = ["wandb"]