From 54254fc030b4e53f9a3593165ccac3964540b545 Mon Sep 17 00:00:00 2001 From: hexagonrose Date: Fri, 14 Mar 2025 02:32:06 +0900 Subject: [PATCH 01/13] feat: we can automatically download 7net-mf-ompa --- sevenn/_const.py | 22 ++++++++-------------- sevenn/util.py | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/sevenn/_const.py b/sevenn/_const.py index 05414e42..58161eae 100644 --- a/sevenn/_const.py +++ b/sevenn/_const.py @@ -48,19 +48,11 @@ ACTIVATION_DICT = {'e': ACTIVATION_FOR_EVEN, 'o': ACTIVATION_FOR_ODD} _prefix = os.path.abspath(f'{os.path.dirname(__file__)}/pretrained_potentials') -SEVENNET_0_11Jul2024 = ( - f'{_prefix}/SevenNet_0__11Jul2024/checkpoint_sevennet_0.pth' -) -SEVENNET_0_22May2024 = ( - f'{_prefix}/SevenNet_0__22May2024/checkpoint_sevennet_0.pth' -) -SEVENNET_l3i5 = ( - f'{_prefix}/SevenNet_l3i5/checkpoint_l3i5.pth' -) -SEVENNET_MF_0 = ( - f'{_prefix}/SevenNet_MF_0/checkpoint_sevennet_mf_0.pth' -) - +SEVENNET_0_11Jul2024 = f'{_prefix}/SevenNet_0__11Jul2024/checkpoint_sevennet_0.pth' +SEVENNET_0_22May2024 = f'{_prefix}/SevenNet_0__22May2024/checkpoint_sevennet_0.pth' +SEVENNET_l3i5 = f'{_prefix}/SevenNet_l3i5/checkpoint_l3i5.pth' +SEVENNET_MF_0 = f'{_prefix}/SevenNet_MF_0/checkpoint_sevennet_mf_0.pth' +SEVENNET_MF_OMPA = f'{_prefix}/SevenNet_MF_OMPA/checkpoint_sevennet_mf_ompa.pth' # to avoid torch script to compile torch_geometry.data AtomGraphDataType = Dict[str, torch.Tensor] @@ -143,7 +135,9 @@ def error_record_condition(x): }, KEY.CUTOFF: float, KEY.NUM_CONVOLUTION: int, - KEY.CONV_DENOMINATOR: lambda x: isinstance(x, float) or x in [ + KEY.CONV_DENOMINATOR: lambda x: isinstance(x, float) + or x + in [ 'avg_num_neigh', 'sqrt_avg_num_neigh', ], diff --git a/sevenn/util.py b/sevenn/util.py index c7325504..fdd0f696 100644 --- a/sevenn/util.py +++ b/sevenn/util.py @@ -203,6 +203,8 @@ def pretrained_name_to_path(name: str) -> str: checkpoint_path = _const.SEVENNET_l3i5 elif name in [f'{n}-mf-0' for n in heads]: checkpoint_path = _const.SEVENNET_MF_0 + elif name in [f'{n}-mf-ompa' for n in heads]: + checkpoint_path = _const.SEVENNET_MF_OMPA else: raise ValueError('Not a valid potential') @@ -215,6 +217,8 @@ def load_checkpoint(checkpoint: Union[pathlib.Path, str]): else: try: checkpoint_path = pretrained_name_to_path(str(checkpoint)) + if not os.path.isfile(checkpoint_path): + download_checkpoint(str(checkpoint)) except ValueError: raise ValueError( f'Given {checkpoint} is not exists and not a pre-trained name' @@ -222,6 +226,28 @@ def load_checkpoint(checkpoint: Union[pathlib.Path, str]): return SevenNetCheckpoint(checkpoint_path) +def download_checkpoint(checkpoint_name: str): + import subprocess + + name = checkpoint_name.lower() + heads = ['sevennet', '7net'] + if name in [f'{n}-mf-ompa' for n in heads]: + download_url = 'https://figshare.com/ndownloader/files/52975859' + pretrained_pot_path = os.path.abspath( + f'{os.path.dirname(__file__)}/pretrained_potentials' + ) + save_path = os.path.join(pretrained_pot_path, 'SevenNet_MF_OMPA') + os.makedirs(save_path, exist_ok=True) + subprocess.run( + [ + 'wget', + '-O', + os.path.join(save_path, 'checkpoint_sevennet_mf_ompa.pth'), + download_url, + ] + ) + + def unique_filepath(filepath: str) -> str: if not os.path.isfile(filepath): return filepath From ef0aa8b277fc47b996aa915285454804443d0784 Mon Sep 17 00:00:00 2001 From: hexagonrose Date: Fri, 14 Mar 2025 17:41:43 +0900 Subject: [PATCH 02/13] feat: apply feedback first --- sevenn/_const.py | 5 +++ sevenn/util.py | 87 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 64 insertions(+), 28 deletions(-) diff --git a/sevenn/_const.py b/sevenn/_const.py index 58161eae..cf2b1015 100644 --- a/sevenn/_const.py +++ b/sevenn/_const.py @@ -53,7 +53,12 @@ SEVENNET_l3i5 = f'{_prefix}/SevenNet_l3i5/checkpoint_l3i5.pth' SEVENNET_MF_0 = f'{_prefix}/SevenNet_MF_0/checkpoint_sevennet_mf_0.pth' SEVENNET_MF_OMPA = f'{_prefix}/SevenNet_MF_OMPA/checkpoint_sevennet_mf_ompa.pth' +SEVENNET_OMAT = f'{_prefix}/SevenNet_OMAT/checkpoint_sevennet_omat.pth' +SEVENNET_DOWNLOAD_LINK = { + SEVENNET_MF_OMPA: 'https://figshare.com/ndownloader/files/52975859', + SEVENNET_OMAT: 'https://figshare.com/ndownloader/files/52984643', +} # to avoid torch script to compile torch_geometry.data AtomGraphDataType = Dict[str, torch.Tensor] diff --git a/sevenn/util.py b/sevenn/util.py index fdd0f696..34cbd734 100644 --- a/sevenn/util.py +++ b/sevenn/util.py @@ -1,5 +1,9 @@ import os import pathlib +import shutil +import tempfile +import urllib.error +import urllib.request from typing import Dict, List, Tuple, Union import numpy as np @@ -7,6 +11,7 @@ import torch.nn from e3nn.o3 import FullTensorProduct, Irreps +import sevenn._const as CONST import sevenn._keys as KEY from sevenn.checkpoint import SevenNetCheckpoint @@ -186,8 +191,6 @@ def infer_irreps_out( def pretrained_name_to_path(name: str) -> str: - import sevenn._const as _const - name = name.lower() heads = ['sevennet', '7net'] checkpoint_path = None @@ -196,18 +199,22 @@ def pretrained_name_to_path(name: str) -> str: or name in [f'{n}-0_11jul2024' for n in heads] or name in ['sevennet-0', '7net-0'] ): - checkpoint_path = _const.SEVENNET_0_11Jul2024 + checkpoint_path = CONST.SEVENNET_0_11Jul2024 elif name in [f'{n}-0_22may2024' for n in heads]: - checkpoint_path = _const.SEVENNET_0_22May2024 + checkpoint_path = CONST.SEVENNET_0_22May2024 elif name in [f'{n}-l3i5' for n in heads]: - checkpoint_path = _const.SEVENNET_l3i5 + checkpoint_path = CONST.SEVENNET_l3i5 elif name in [f'{n}-mf-0' for n in heads]: - checkpoint_path = _const.SEVENNET_MF_0 + checkpoint_path = CONST.SEVENNET_MF_0 elif name in [f'{n}-mf-ompa' for n in heads]: - checkpoint_path = _const.SEVENNET_MF_OMPA + checkpoint_path = CONST.SEVENNET_MF_OMPA + elif name in [f'{n}-omat' for n in heads]: + checkpoint_path = CONST.SEVENNET_OMAT else: raise ValueError('Not a valid potential') + checkpoint_path = check_and_download_checkpoint(checkpoint_path) + return checkpoint_path @@ -217,8 +224,6 @@ def load_checkpoint(checkpoint: Union[pathlib.Path, str]): else: try: checkpoint_path = pretrained_name_to_path(str(checkpoint)) - if not os.path.isfile(checkpoint_path): - download_checkpoint(str(checkpoint)) except ValueError: raise ValueError( f'Given {checkpoint} is not exists and not a pre-trained name' @@ -226,26 +231,52 @@ def load_checkpoint(checkpoint: Union[pathlib.Path, str]): return SevenNetCheckpoint(checkpoint_path) -def download_checkpoint(checkpoint_name: str): - import subprocess - - name = checkpoint_name.lower() - heads = ['sevennet', '7net'] - if name in [f'{n}-mf-ompa' for n in heads]: - download_url = 'https://figshare.com/ndownloader/files/52975859' - pretrained_pot_path = os.path.abspath( - f'{os.path.dirname(__file__)}/pretrained_potentials' - ) - save_path = os.path.join(pretrained_pot_path, 'SevenNet_MF_OMPA') +def check_and_download_checkpoint(checkpoint_path: str): + # check if the file exists + if os.path.isfile(checkpoint_path): + return checkpoint_path + model_name = os.path.basename(os.path.dirname(checkpoint_path)) + home_save_path = os.path.expanduser(f'~/.cache/{model_name}') + checkpoint_path2 = os.path.join( + home_save_path, os.path.basename(checkpoint_path) + ) + if os.path.isfile(checkpoint_path2): + return checkpoint_path2 + + # download the file + download_url = CONST.SEVENNET_DOWNLOAD_LINK.get(checkpoint_path) + print(f'Downloading {model_name} checkpoint', flush=True) + try: + save_path = os.path.dirname(checkpoint_path) os.makedirs(save_path, exist_ok=True) - subprocess.run( - [ - 'wget', - '-O', - os.path.join(save_path, 'checkpoint_sevennet_mf_ompa.pth'), - download_url, - ] - ) + except Exception: + try: + save_path = home_save_path + os.makedirs(save_path, exist_ok=True) + checkpoint_path = checkpoint_path2 + except ValueError: + raise ValueError( + f'Failed to create save path for {model_name} checkpoint' + ) + print(f'Saving to {save_path}', flush=True) + with tempfile.NamedTemporaryFile(delete=False, dir=save_path) as temp_file: + temp_path = temp_file.name + try: + _, http_msg = urllib.request.urlretrieve(download_url, temp_path) + print(f'Download complete to {save_path}', flush=True) + shutil.move(temp_path, checkpoint_path) + except ( + urllib.error.URLError, + urllib.error.HTTPError, + OSError, + shutil.Error, + KeyboardInterrupt, + ) as e: + raise ValueError(f'Failed to download {model_name} checkpoint: {e}') + finally: + if os.path.isfile(temp_path): + os.remove(temp_path) + return checkpoint_path def unique_filepath(filepath: str) -> str: From e8557f01d34ce3948b3cdb05ec0eb883ce99fffa Mon Sep 17 00:00:00 2001 From: hexagonrose Date: Sun, 16 Mar 2025 01:47:40 +0900 Subject: [PATCH 03/13] feat: apply feedback 2, lighter checkpoint --- sevenn/_const.py | 4 ++-- sevenn/util.py | 12 ++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/sevenn/_const.py b/sevenn/_const.py index cf2b1015..7089b9df 100644 --- a/sevenn/_const.py +++ b/sevenn/_const.py @@ -56,8 +56,8 @@ SEVENNET_OMAT = f'{_prefix}/SevenNet_OMAT/checkpoint_sevennet_omat.pth' SEVENNET_DOWNLOAD_LINK = { - SEVENNET_MF_OMPA: 'https://figshare.com/ndownloader/files/52975859', - SEVENNET_OMAT: 'https://figshare.com/ndownloader/files/52984643', + SEVENNET_MF_OMPA: 'https://figshare.com/ndownloader/files/53015138', + SEVENNET_OMAT: 'https://figshare.com/ndownloader/files/53015129', } # to avoid torch script to compile torch_geometry.data AtomGraphDataType = Dict[str, torch.Tensor] diff --git a/sevenn/util.py b/sevenn/util.py index 34cbd734..d50063ae 100644 --- a/sevenn/util.py +++ b/sevenn/util.py @@ -250,14 +250,10 @@ def check_and_download_checkpoint(checkpoint_path: str): save_path = os.path.dirname(checkpoint_path) os.makedirs(save_path, exist_ok=True) except Exception: - try: - save_path = home_save_path - os.makedirs(save_path, exist_ok=True) - checkpoint_path = checkpoint_path2 - except ValueError: - raise ValueError( - f'Failed to create save path for {model_name} checkpoint' - ) + save_path = home_save_path + os.makedirs(save_path, exist_ok=True) + checkpoint_path = checkpoint_path2 + print(f'Saving to {save_path}', flush=True) with tempfile.NamedTemporaryFile(delete=False, dir=save_path) as temp_file: temp_path = temp_file.name From 4b27d7eee9e5e0043c0cf1dee0c7f9396cbaa503 Mon Sep 17 00:00:00 2001 From: hexagonrose Date: Sun, 16 Mar 2025 02:27:18 +0900 Subject: [PATCH 04/13] feat: add pytest of new models --- tests/unit_tests/test_pretrained.py | 140 +++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 3 deletions(-) diff --git a/tests/unit_tests/test_pretrained.py b/tests/unit_tests/test_pretrained.py index 58e33d0c..dd198613 100644 --- a/tests/unit_tests/test_pretrained.py +++ b/tests/unit_tests/test_pretrained.py @@ -196,9 +196,9 @@ def test_7net_mf_0(atoms_pbc, atoms_mol): g2_ref_e = torch.tensor([-14.172412872314453]) g2_ref_f = torch.tensor( [ - [4.6566129e-10, -1.3429364e+01, 6.9344816e+00], - [2.3283064e-09, 8.9132404e+00, -9.6807365e+00], - [-2.7939677e-09, 4.5161238e+00, 2.7462559e+00], + [4.6566129e-10, -1.3429364e01, 6.9344816e00], + [2.3283064e-09, 8.9132404e00, -9.6807365e00], + [-2.7939677e-09, 4.5161238e00, 2.7462559e00], ] ) @@ -208,3 +208,137 @@ def test_7net_mf_0(atoms_pbc, atoms_mol): assert acl(g2.inferred_total_energy, g2_ref_e) assert acl(g2.inferred_force, g2_ref_f) + + +def test_7net_mf_ompa_mpa(atoms_pbc, atoms_mol): + cp_path = pretrained_name_to_path('7net-mf-ompa') + model, config = model_from_checkpoint(cp_path) + cutoff = config['cutoff'] + + g1 = AtomGraphData.from_numpy_dict(unlabeled_atoms_to_graph(atoms_pbc, cutoff)) + g2 = AtomGraphData.from_numpy_dict(unlabeled_atoms_to_graph(atoms_mol, cutoff)) + + # mpa + g1[KEY.DATA_MODALITY] = 'mpa' + g2[KEY.DATA_MODALITY] = 'mpa' + + model.set_is_batch_data(False) + g1 = model(g1) + g2 = model(g2) + + model.set_is_batch_data(True) + + g1_ref_e = torch.tensor([-3.49159574508667]) + g1_ref_f = torch.tensor( + [ + [1.2676645e01, -9.1320835e-04, -9.1319904e-04], + [-1.2676647e01, 9.1319904e-04, 9.1320090e-04], + ] + ) + g1_ref_s = -1 * torch.tensor( + # xx, yy, zz, xy, yz, zx + [-0.6478229, -0.02454099, -0.02454098, 0.02695589, 0.00459683, 0.02695588] + ) + + g2_ref_e = torch.tensor([-12.59517765045166]) + g2_ref_f = torch.tensor( + [ + [0.0, -12.236995, 7.270792], + [0.0, 8.810955, -9.422472], + [0.0, 3.4260397, 2.15168], + ] + ) + assert acl(g1.inferred_total_energy, g1_ref_e) + assert acl(g1.inferred_force, g1_ref_f) + assert acl(g1.inferred_stress, g1_ref_s) + + assert acl(g2.inferred_total_energy, g2_ref_e) + assert acl(g2.inferred_force, g2_ref_f) + + +def test_7net_mf_ompa_omat(atoms_pbc, atoms_mol): + cp_path = pretrained_name_to_path('7net-mf-ompa') + model, config = model_from_checkpoint(cp_path) + cutoff = config['cutoff'] + + g1 = AtomGraphData.from_numpy_dict(unlabeled_atoms_to_graph(atoms_pbc, cutoff)) + g2 = AtomGraphData.from_numpy_dict(unlabeled_atoms_to_graph(atoms_mol, cutoff)) + + # mpa + g1[KEY.DATA_MODALITY] = 'omat24' + g2[KEY.DATA_MODALITY] = 'omat24' + + model.set_is_batch_data(False) + g1 = model(g1) + g2 = model(g2) + + model.set_is_batch_data(True) + + g1_ref_e = torch.tensor([-3.5113704204559326]) + g1_ref_f = torch.tensor( + [ + [1.2555956e01, -2.1019224e-03, -2.1019187e-03], + [-1.2555954e01, 2.1019280e-03, 2.1019522e-03], + ] + ) + g1_ref_s = -1 * torch.tensor( + # xx, yy, zz, xy, yz, zx + [-0.6425987, -0.02529771, -0.0252977, 0.02685493, 0.00460126, 0.02685493] + ) + + g2_ref_e = torch.tensor([-12.618719100952148]) + g2_ref_f = torch.tensor( + [ + [0.0, -12.197467, 7.2081957], + [0.0, 8.784865, -9.367334], + [0.0, 3.412602, 2.1591387], + ] + ) + assert acl(g1.inferred_total_energy, g1_ref_e) + assert acl(g1.inferred_force, g1_ref_f) + assert acl(g1.inferred_stress, g1_ref_s) + + assert acl(g2.inferred_total_energy, g2_ref_e) + assert acl(g2.inferred_force, g2_ref_f) + + +def test_7net_omat(atoms_pbc, atoms_mol): + cp_path = pretrained_name_to_path('7net-omat') + model, config = model_from_checkpoint(cp_path) + cutoff = config['cutoff'] + + g1 = AtomGraphData.from_numpy_dict(unlabeled_atoms_to_graph(atoms_pbc, cutoff)) + g2 = AtomGraphData.from_numpy_dict(unlabeled_atoms_to_graph(atoms_mol, cutoff)) + + model.set_is_batch_data(False) + g1 = model(g1) + g2 = model(g2) + + model.set_is_batch_data(True) + + g1_ref_e = torch.tensor([-3.5033323764801025]) + g1_ref_f = torch.tensor( + [ + [12.533154, 0.02358698, 0.02358694], + [-12.533153, -0.02358699, -0.02358697], + ] + ) + g1_ref_s = -1 * torch.tensor( + # xx, yy, zz, xy, yz, zx + [-0.6420925, -0.02781446, -0.02781446, 0.02575445, 0.00381664, 0.02575445] + ) + + g2_ref_e = torch.tensor([-12.403768539428711]) + g2_ref_f = torch.tensor( + [ + [0, -12.848297, 7.11432], + [0.0, 9.265477, -9.564951], + [0.0, 3.58282, 2.4506311], + ] + ) + assert acl(g1.inferred_total_energy, g1_ref_e) + assert acl(g1.inferred_force, g1_ref_f) + assert acl(g1.inferred_stress, g1_ref_s) + + assert acl(g2.inferred_total_energy, g2_ref_e) + assert acl(g2.inferred_force, g2_ref_f) From 1a1c9ec8e70711404195d3d9ba8124f2b362f9b0 Mon Sep 17 00:00:00 2001 From: YutackPark Date: Sun, 16 Mar 2025 15:15:04 +0900 Subject: [PATCH 05/13] refactor: change url to git --- pyproject.toml | 1 + setup.cfg | 2 +- sevenn/_const.py | 11 ++-- sevenn/util.py | 132 ++++++++++++++++++++++++++--------------------- 4 files changed, 81 insertions(+), 65 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 543ff051..093a0c99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "numpy", "matscipy", "pandas", + "requests", ] [project.optional-dependencies] test = ["matscipy", "pytest-cov>=5"] diff --git a/setup.cfg b/setup.cfg index 84ac35f1..1505c8b8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,5 +10,5 @@ include_trailing_comma=True force_grid_wrap=0 use_parentheses=True line_length=80 -known_third_party=ase,braceexpand,e3nn,numpy,packaging,pandas,pytest,sklearn,torch,torch_geometric,tqdm,yaml +known_third_party=ase,braceexpand,e3nn,numpy,packaging,pandas,pytest,requests,sklearn,torch,torch_geometric,tqdm,yaml known_first_party= diff --git a/sevenn/_const.py b/sevenn/_const.py index 7089b9df..2d753d04 100644 --- a/sevenn/_const.py +++ b/sevenn/_const.py @@ -52,12 +52,13 @@ SEVENNET_0_22May2024 = f'{_prefix}/SevenNet_0__22May2024/checkpoint_sevennet_0.pth' SEVENNET_l3i5 = f'{_prefix}/SevenNet_l3i5/checkpoint_l3i5.pth' SEVENNET_MF_0 = f'{_prefix}/SevenNet_MF_0/checkpoint_sevennet_mf_0.pth' -SEVENNET_MF_OMPA = f'{_prefix}/SevenNet_MF_OMPA/checkpoint_sevennet_mf_ompa.pth' -SEVENNET_OMAT = f'{_prefix}/SevenNet_OMAT/checkpoint_sevennet_omat.pth' +SEVENNET_MF_ompa = f'{_prefix}/SevenNet_MF_OMPA/checkpoint_sevennet_mf_ompa.pth' +SEVENNET_omat = f'{_prefix}/SevenNet_OMAT/checkpoint_sevennet_omat.pth' -SEVENNET_DOWNLOAD_LINK = { - SEVENNET_MF_OMPA: 'https://figshare.com/ndownloader/files/53015138', - SEVENNET_OMAT: 'https://figshare.com/ndownloader/files/53015129', +_git_prefix = 'https://github.com/MDIL-SNU/SevenNet/releases/download' +CHECKPOINT_DOWNLOAD_LINKS = { + SEVENNET_MF_ompa: f'{_git_prefix}/v0.11.0.cp/checkpoint_sevennet_mf_ompa.pth', + SEVENNET_omat: f'{_git_prefix}/v0.11.0.cp/checkpoint_sevennet_omat.pth', } # to avoid torch script to compile torch_geometry.data AtomGraphDataType = Dict[str, torch.Tensor] diff --git a/sevenn/util.py b/sevenn/util.py index d50063ae..f8acfdf8 100644 --- a/sevenn/util.py +++ b/sevenn/util.py @@ -1,17 +1,19 @@ import os +import os.path as osp import pathlib import shutil -import tempfile -import urllib.error -import urllib.request +import sys +from pathlib import Path from typing import Dict, List, Tuple, Union import numpy as np +import requests import torch import torch.nn from e3nn.o3 import FullTensorProduct, Irreps +from tqdm import tqdm -import sevenn._const as CONST +import sevenn._const as _const import sevenn._keys as KEY from sevenn.checkpoint import SevenNetCheckpoint @@ -190,91 +192,103 @@ def infer_irreps_out( return Irreps(new_irreps_elem) +def download_checkpoint(path: str, url: str): + # raises permission error if fails + fname = osp.basename(path) + os.makedirs(osp.dirname(path), exist_ok=True) + temp_path = path + '.partial' + try: + response = requests.get(url, stream=True, timeout=30) + response.raise_for_status() # Raise exception for bad status codes + + total_size = int(response.headers.get('content-length', 0)) + block_size = 1024 # 1 KB chunks + + progress_bar = tqdm( + total=total_size, + unit='B', + unit_scale=True, + desc=f'Downloading {fname}', + ) + + with open(temp_path, 'wb') as file: + for data in response.iter_content(block_size): + progress_bar.update(len(data)) + file.write(data) + progress_bar.close() + + shutil.move(temp_path, path) + print(f'Checkpoint downloaded: {path}') + return path + except Exception as e: + # Clean up partial downloads on failure + print(f'Download failed: {str(e)}') + if os.path.exists(temp_path): + print(f'Cleaning up partial download: {temp_path}') + os.remove(temp_path) + raise + + def pretrained_name_to_path(name: str) -> str: name = name.lower() heads = ['sevennet', '7net'] checkpoint_path = None + url = None + if ( # TODO: regex name in [f'{n}-0_11july2024' for n in heads] or name in [f'{n}-0_11jul2024' for n in heads] or name in ['sevennet-0', '7net-0'] ): - checkpoint_path = CONST.SEVENNET_0_11Jul2024 + checkpoint_path = _const.SEVENNET_0_11Jul2024 elif name in [f'{n}-0_22may2024' for n in heads]: - checkpoint_path = CONST.SEVENNET_0_22May2024 + checkpoint_path = _const.SEVENNET_0_22May2024 elif name in [f'{n}-l3i5' for n in heads]: - checkpoint_path = CONST.SEVENNET_l3i5 + checkpoint_path = _const.SEVENNET_l3i5 elif name in [f'{n}-mf-0' for n in heads]: - checkpoint_path = CONST.SEVENNET_MF_0 + checkpoint_path = _const.SEVENNET_MF_0 elif name in [f'{n}-mf-ompa' for n in heads]: - checkpoint_path = CONST.SEVENNET_MF_OMPA + checkpoint_path = _const.SEVENNET_MF_ompa elif name in [f'{n}-omat' for n in heads]: - checkpoint_path = CONST.SEVENNET_OMAT + checkpoint_path = _const.SEVENNET_omat else: - raise ValueError('Not a valid potential') + raise ValueError('Not a valid pretrained model name') + url = _const.CHECKPOINT_DOWNLOAD_LINKS.get(checkpoint_path) - checkpoint_path = check_and_download_checkpoint(checkpoint_path) + paths = [ + checkpoint_path, + checkpoint_path.replace(_const._prefix, osp.expanduser('~/.cache/sevennet')), + ] - return checkpoint_path + for path in paths: + if osp.exists(path): + return path + + # File not found check url and try download + if url is None: + raise FileNotFoundError(checkpoint_path) + + try: + return download_checkpoint(paths[0], url) + except PermissionError: + return download_checkpoint(paths[1], url) def load_checkpoint(checkpoint: Union[pathlib.Path, str]): - if os.path.isfile(checkpoint): + suggests = ['7net-0, 7net-l3i5, 7net-mf-ompa, 7net-omat'] + if osp.isfile(checkpoint): checkpoint_path = checkpoint else: try: checkpoint_path = pretrained_name_to_path(str(checkpoint)) except ValueError: raise ValueError( - f'Given {checkpoint} is not exists and not a pre-trained name' + f'Given {checkpoint} is not exists and not a pre-trained name.\n' + f'Valid pretrained model names: {suggests}' ) return SevenNetCheckpoint(checkpoint_path) -def check_and_download_checkpoint(checkpoint_path: str): - # check if the file exists - if os.path.isfile(checkpoint_path): - return checkpoint_path - model_name = os.path.basename(os.path.dirname(checkpoint_path)) - home_save_path = os.path.expanduser(f'~/.cache/{model_name}') - checkpoint_path2 = os.path.join( - home_save_path, os.path.basename(checkpoint_path) - ) - if os.path.isfile(checkpoint_path2): - return checkpoint_path2 - - # download the file - download_url = CONST.SEVENNET_DOWNLOAD_LINK.get(checkpoint_path) - print(f'Downloading {model_name} checkpoint', flush=True) - try: - save_path = os.path.dirname(checkpoint_path) - os.makedirs(save_path, exist_ok=True) - except Exception: - save_path = home_save_path - os.makedirs(save_path, exist_ok=True) - checkpoint_path = checkpoint_path2 - - print(f'Saving to {save_path}', flush=True) - with tempfile.NamedTemporaryFile(delete=False, dir=save_path) as temp_file: - temp_path = temp_file.name - try: - _, http_msg = urllib.request.urlretrieve(download_url, temp_path) - print(f'Download complete to {save_path}', flush=True) - shutil.move(temp_path, checkpoint_path) - except ( - urllib.error.URLError, - urllib.error.HTTPError, - OSError, - shutil.Error, - KeyboardInterrupt, - ) as e: - raise ValueError(f'Failed to download {model_name} checkpoint: {e}') - finally: - if os.path.isfile(temp_path): - os.remove(temp_path) - return checkpoint_path - - def unique_filepath(filepath: str) -> str: if not os.path.isfile(filepath): return filepath From 9aee18f4b09f9990264fcbd05ff8ab4b38e6e34d Mon Sep 17 00:00:00 2001 From: YutackPark Date: Sun, 16 Mar 2025 15:53:55 +0900 Subject: [PATCH 06/13] refactor --- sevenn/util.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sevenn/util.py b/sevenn/util.py index f8acfdf8..8f2a2aa8 100644 --- a/sevenn/util.py +++ b/sevenn/util.py @@ -195,9 +195,9 @@ def infer_irreps_out( def download_checkpoint(path: str, url: str): # raises permission error if fails fname = osp.basename(path) - os.makedirs(osp.dirname(path), exist_ok=True) temp_path = path + '.partial' try: + os.makedirs(osp.dirname(path), exist_ok=True) response = requests.get(url, stream=True, timeout=30) response.raise_for_status() # Raise exception for bad status codes @@ -220,6 +220,8 @@ def download_checkpoint(path: str, url: str): shutil.move(temp_path, path) print(f'Checkpoint downloaded: {path}') return path + except PermissionError: + raise except Exception as e: # Clean up partial downloads on failure print(f'Download failed: {str(e)}') @@ -269,9 +271,9 @@ def pretrained_name_to_path(name: str) -> str: raise FileNotFoundError(checkpoint_path) try: - return download_checkpoint(paths[0], url) + return download_checkpoint(paths[0], url) # 7net package path except PermissionError: - return download_checkpoint(paths[1], url) + return download_checkpoint(paths[1], url) # ~/.cache def load_checkpoint(checkpoint: Union[pathlib.Path, str]): From 0e42e89a877209815fffbba28603ea5e8a54679a Mon Sep 17 00:00:00 2001 From: YutackPark Date: Sun, 16 Mar 2025 15:58:15 +0900 Subject: [PATCH 07/13] docs: changelog --- CHANGELOG.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03b15de8..6713564b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,20 +3,19 @@ All notable changes to this project will be documented in this file. ## [0.11.0] + +Multi-fidelity learning implemented & New pretrained-models + ### Added - Build multi-fidelity model, SevenNet-MF, based on given modality in the yaml - Modality support for sevenn_inference, sevenn_get_modal, and SevenNetCalculator -- [cli] sevenn_cp tool for checkpoint summary, input generation, multi-modal routines +- sevenn_cp tool for checkpoint summary, input generation, multi-modal routines - Modality append / assign using sevenn_cp - Loss weighting for energy, force and stress for corresponding data label - Ignore unlabelled data when calculating loss. (e.g. stress data for non-pbc structure) - Dict style dataset input for multi-modal and data-weight - (experimental) cuEquivariance support - -### Added (code) -- sevenn.train.modal_dataset SevenNetMultiModalDataset -- sevenn.scripts.backward_compatibility.py -- sevenn.checkpoint.py +- Downloading large checkpoints from url (7net-MF-ompa, 7net-omat) - D3 wB97M param ### Changed From c43c640e7131f9f3d5cffd0005a0e198c7546a97 Mon Sep 17 00:00:00 2001 From: hexagonrose Date: Sun, 16 Mar 2025 16:27:18 +0900 Subject: [PATCH 08/13] docs: write the informations about new models on readme --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index b19707d7..950f7a71 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,24 @@ Additionally, `keywords` can be called in other parts of SevenNet, such as `seve **Acknowledgments**: The models trained on [`MPtrj`](https://figshare.com/articles/dataset/Materials_Project_Trjectory_MPtrj_Dataset/23713842) were supported by the Neural Processing Research Center program of Samsung Advanced Institute of Technology, Samsung Electronics Co., Ltd. The computations for training models were carried out using the Samsung SSC-21 cluster. +--- + +### **SevenNet-MF-ompa (16Mar2025)** +> Keywords in ASE: `7net-mf-ompa` and `SevenNet-mf-ompa` + +This model utilizes multi-fidelity learning to simultaneously train on the MPtrj, sAlex, and OMat24 datasets. This is the best-balanced model that performs well in both Matbench F1 score and thermal conductivity calculations. Since it is a multi-fidelity model, it can calculate results corresponding to both PBE52 (MPtrj, sAlex) and PBE54 (OMat24). +* Training set MAE: 11.0 meV/atom (energy), 0.053 eV/Ang. (force), and 4.84 kbar (stress) +* Matbench F1 score: 0.901, $\kappa_{\mathrm{SRME}}$: 0.314 --- +### **SevenNet-omat (16Mar2025)** +> Keywords in ASE: `7net-omat` and `SevenNet-omat` + This model was trained solely on OMat24 dataset. Due to the POTCAR version difference between the OMat24 data and the MPtrj data, there is no Matbench f1 score, but this model exhibits SOTA performance in thermal conductivity calculations. + +* Training set MAE: 14.2 meV/atom (energy), 0.072 eV/Ang. (force), and 6.36 kbar (stress) +* $\kappa_{\mathrm{SRME}}$: 0.221 +--- ### **SevenNet-l3i5 (12Dec2024)** > Keywords in ASE: `7net-l3i5` and `SevenNet-l3i5` From 728405cac1ea005f1460f8f5b7a133aa0f0c83bf Mon Sep 17 00:00:00 2001 From: hexagonrose Date: Mon, 17 Mar 2025 00:24:56 +0900 Subject: [PATCH 09/13] test: fix mf_ompa test in test_pretrained --- tests/unit_tests/test_pretrained.py | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/unit_tests/test_pretrained.py b/tests/unit_tests/test_pretrained.py index dd198613..4676ca47 100644 --- a/tests/unit_tests/test_pretrained.py +++ b/tests/unit_tests/test_pretrained.py @@ -228,24 +228,24 @@ def test_7net_mf_ompa_mpa(atoms_pbc, atoms_mol): model.set_is_batch_data(True) - g1_ref_e = torch.tensor([-3.49159574508667]) + g1_ref_e = torch.tensor([-3.490943193435669]) g1_ref_f = torch.tensor( [ - [1.2676645e01, -9.1320835e-04, -9.1319904e-04], - [-1.2676647e01, 9.1319904e-04, 9.1320090e-04], + [1.2680445e01, -2.7985498e-04, -2.7979910e-04], + [-1.2680446e01, 2.7984008e-04, 2.7981028e-04], ] ) g1_ref_s = -1 * torch.tensor( # xx, yy, zz, xy, yz, zx - [-0.6478229, -0.02454099, -0.02454098, 0.02695589, 0.00459683, 0.02695588] + [-0.6481662, -0.02462837, -0.02462837, 0.02693467, 0.00459635, 0.02693467] ) - g2_ref_e = torch.tensor([-12.59517765045166]) + g2_ref_e = torch.tensor([-12.597525596618652]) g2_ref_f = torch.tensor( [ - [0.0, -12.236995, 7.270792], - [0.0, 8.810955, -9.422472], - [0.0, 3.4260397, 2.15168], + [0.0, -12.245223, 7.26795], + [0.0, 8.816763, -9.423925], + [0.0, 3.4284601, 2.1559749], ] ) assert acl(g1.inferred_total_energy, g1_ref_e) @@ -274,24 +274,24 @@ def test_7net_mf_ompa_omat(atoms_pbc, atoms_mol): model.set_is_batch_data(True) - g1_ref_e = torch.tensor([-3.5113704204559326]) + g1_ref_e = torch.tensor([-3.5094668865203857]) g1_ref_f = torch.tensor( [ - [1.2555956e01, -2.1019224e-03, -2.1019187e-03], - [-1.2555954e01, 2.1019280e-03, 2.1019522e-03], + [1.2562084e01, -1.4219694e-03, -1.4219843e-03], + [-1.2562084e01, 1.4219508e-03, 1.4219955e-03], ] ) g1_ref_s = -1 * torch.tensor( # xx, yy, zz, xy, yz, zx - [-0.6425987, -0.02529771, -0.0252977, 0.02685493, 0.00460126, 0.02685493] + [-0.6430905, -0.0254128, -0.02541281, 0.0268343, 0.00460021, 0.0268343] ) - g2_ref_e = torch.tensor([-12.618719100952148]) + g2_ref_e = torch.tensor([-12.6202974319458]) g2_ref_f = torch.tensor( [ - [0.0, -12.197467, 7.2081957], - [0.0, 8.784865, -9.367334], - [0.0, 3.412602, 2.1591387], + [0.0, -12.205926, 7.2050343], + [0.0, 8.790399, -9.368677], + [0.0, 3.4155273, 2.163643], ] ) assert acl(g1.inferred_total_energy, g1_ref_e) From e8a2d2b16c2bb15b192f43496025239d823a875f Mon Sep 17 00:00:00 2001 From: hexagonrose Date: Mon, 17 Mar 2025 12:56:48 +0900 Subject: [PATCH 10/13] docs: revise model descriptions in README --- README.md | 67 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 950f7a71..79b411d3 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ SevenNet (Scalable EquiVariance Enabled Neural Network) is a graph neural networ ## Pre-trained models So far, we have released three pre-trained SevenNet models. Each model has various hyperparameters and training sets, resulting in different accuracy and speed. Please read the descriptions below carefully and choose the model that best suits your purpose. -We provide the training set MAEs (energy, force, and stress) F1 score for WBM dataset and $\kappa_{\mathrm{SRME}}$ from phonondb. For details on these metrics and performance comparisons with other pre-trained models, please visit [Matbench Discovery](https://matbench-discovery.materialsproject.org/). +We provide the training set MAEs (energy, force, and stress) F1 score, and RMSD for the WBM dataset, as well as $\kappa_{\mathrm{SRME}}$ from phonondb and CPS (Combined Performance Score). For details on these metrics and performance comparisons with other pre-trained models, please visit [Matbench Discovery](https://matbench-discovery.materialsproject.org/). These models can be used as interatomic potential on LAMMPS, and also can be loaded through ASE calculator by calling the `keywords` of each model. Please refer [ASE calculator](#ase_calculator) to see the way to load a model through ASE calculator. Additionally, `keywords` can be called in other parts of SevenNet, such as `sevenn_inference`, `sevenn_get_model`, and `checkpoint` in `input.yaml` for fine-tuning. @@ -28,31 +28,55 @@ Additionally, `keywords` can be called in other parts of SevenNet, such as `seve --- -### **SevenNet-MF-ompa (16Mar2025)** +### **SevenNet-MF-ompa (17Mar2025)** > Keywords in ASE: `7net-mf-ompa` and `SevenNet-mf-ompa` -This model utilizes multi-fidelity learning to simultaneously train on the MPtrj, sAlex, and OMat24 datasets. This is the best-balanced model that performs well in both Matbench F1 score and thermal conductivity calculations. Since it is a multi-fidelity model, it can calculate results corresponding to both PBE52 (MPtrj, sAlex) and PBE54 (OMat24). +This model leverages multi-fidelity learning to simultaneously train on the [MPtrj](https://figshare.com/articles/dataset/Materials_Project_Trjectory_MPtrj_Dataset/23713842), [sAlex](https://huggingface.co/datasets/fairchem/OMAT24), and [OMat24](https://huggingface.co/datasets/fairchem/OMAT24) datasets. Although its accuracy in **molecular energy** calculations is lower, it outperforms the existing l3i5 model in other computational tasks. This model achieves high accuracy in energy and force calculations and, as of March 17, 2025, has achieved state-of-the-art (SOTA) performance on the CPS metric, newly introduced in this [Matbench Discovery](https://matbench-discovery.materialsproject.org/).
+To use this model, you should specify the `modal` argument. You can use `'mpa'` for PBE52 calculations, and `'omat24'` for PBE54 calculations. Detailed usage instructions for the multi-fidelity (MF) model are available at this [link](https://github.com/MDIL-SNU/SevenNet/blob/main/sevenn/pretrained_potentials/SevenNet_MF_0/README.md).
+The checkpoint available for download via GitHub has been lightweighted by removing some information. The full-information checkpoint can be downloaded through this [link](https://figshare.com/articles/software/7net_MF_ompa/28590722?file=53029859). -* Training set MAE: 11.0 meV/atom (energy), 0.053 eV/Ang. (force), and 4.84 kbar (stress) -* Matbench F1 score: 0.901, $\kappa_{\mathrm{SRME}}$: 0.314 +#### **Training set MAE** +| Energy (meV/atom) | Force (eV/Å) | Stress (kbar) | +|:---:|:---:|:---:| +|11.2|0.053|4.82| + +#### **Matbench Discovery** +| CPS | F1 | $\kappa_{\mathrm{SRME}}$ | RMSD | +|:---:|:---:|:---:|:---:| +|0.883|0.901|0.317| 0.0115 | --- -### **SevenNet-omat (16Mar2025)** +### **SevenNet-omat (17Mar2025)** > Keywords in ASE: `7net-omat` and `SevenNet-omat` - This model was trained solely on OMat24 dataset. Due to the POTCAR version difference between the OMat24 data and the MPtrj data, there is no Matbench f1 score, but this model exhibits SOTA performance in thermal conductivity calculations. + This model was trained solely on [OMat24](https://huggingface.co/datasets/fairchem/OMAT24) dataset. Although its accuracy in **molecular energy** calculations is lower, it outperforms the existing l3i5 model in other computational tasks. Due to the POTCAR version differences between the [MPtrj](https://figshare.com/articles/dataset/Materials_Project_Trjectory_MPtrj_Dataset/23713842), and [OMat24](https://huggingface.co/datasets/fairchem/OMAT24) datasets, no Matbench F1 score is available; however, this model shows state-of-the-art (SOTA) performance in thermal conductivity calculations.
The checkpoint available for download via GitHub has been lightweighted by removing some information. The full-information checkpoint can be downloaded through this [link](https://figshare.com/articles/software/SevenNet_omat/28593938). + +#### **Training set MAE** +| Energy (meV/atom) | Force (eV/Å) | Stress (kbar) | +|:---:|:---:|:---:| +|14.9|0.073|6.53| -* Training set MAE: 14.2 meV/atom (energy), 0.072 eV/Ang. (force), and 6.36 kbar (stress) +#### **Matbench Discovery** * $\kappa_{\mathrm{SRME}}$: 0.221 --- ### **SevenNet-l3i5 (12Dec2024)** > Keywords in ASE: `7net-l3i5` and `SevenNet-l3i5` -The model increases the maximum spherical harmonic degree ($l_{\mathrm{max}}$) to 3, compared to **SevenNet-0 (11Jul2024)** with $l_{\mathrm{max}}$ of 2. -While **l3i5** offers improved accuracy across various systems compared to **SevenNet-0 (11Jul2024)**, it is approximately four times slower. +The model increases the maximum spherical harmonic degree ($l_{\mathrm{max}}$) to 3, compared to **SevenNet-0 (11Jul2024)** with $l_{\mathrm{max}}$ of 2. While **l3i5** offers improved accuracy across various systems compared to **SevenNet-0 (11Jul2024)**, it is approximately four times slower. As of March 17, 2025, this model has achieved state-of-the-art (SOTA) performance on the CPS metric among compliant models, newly introduced in this [Matbench Discovery](https://matbench-discovery.materialsproject.org/). + +#### **Training set MAE** +| Energy (meV/atom) | Force (eV/Å) | Stress (kbar) | +|:---:|:---:|:---:| +|8.3|0.029|2.33| + +#### **Matbench Discovery** +| CPS | F1 | $\kappa_{\mathrm{SRME}}$ | RMSD | +|:---:|:---:|:---:|:---:| +|0.764 |0.76|0.55|0.0182| + + +#### **Training time** +381 GPU-days on A100 -* Training set MAE: 8.3 meV/atom (energy), 0.029 eV/Ang. (force), and 2.33 kbar (stress) -* Matbench F1 score: 0.76, $\kappa_{\mathrm{SRME}}$: 0.560 -* Training time: 381 GPU-days on A100 --- ### **SevenNet-0 (11Jul2024)** @@ -65,9 +89,19 @@ The model was trained with [MPtrj](https://figshare.com/articles/dataset/Materia This model is loaded as the default pre-trained model in ASE calculator. For more information, click [here](sevenn/pretrained_potentials/SevenNet_0__11Jul2024). -* Training set MAE: 11.5 meV/atom (energy), 0.041 eV/Ang. (force), and 2.78 kbar (stress) -* Matbench F1 score: 0.67, $\kappa_{\mathrm{SRME}}$: 0.767 -* Training time: 90 GPU-days on A100 +#### **Training set MAE** +| Energy (meV/atom) | Force (eV/Å) | Stress (kbar) | +|:---:|:---:|:---:| +|11.5|0.041|2.78| + +#### **Matbench Discovery** +| F1 | $\kappa_{\mathrm{SRME}}$ | +|:---:|:---:| +|0.67|0.767| + +#### **Training time** +90 GPU-days on A100 + --- In addition to these latest models, you can find our legacy models from [pretrained_potentials](./sevenn/pretrained_potentials). @@ -121,7 +155,6 @@ The model can be loaded through the following Python code. from sevenn.calculator import SevenNetCalculator calc = SevenNetCalculator(model='7net-0', device='cpu') ``` - SevenNet supports CUDA accelerated D3Calculator. ```python from sevenn.calculator import SevenNetD3Calculator From 8bd75fe2bac3b90492e3ea8608ac50be4f8fd10f Mon Sep 17 00:00:00 2001 From: YutackPark Date: Mon, 17 Mar 2025 12:59:11 +0900 Subject: [PATCH 11/13] refactor --- sevenn/_const.py | 4 +- sevenn/calculator.py | 93 ++++++++++++++++++++++++++++++-------------- sevenn/util.py | 5 +-- 3 files changed, 68 insertions(+), 34 deletions(-) diff --git a/sevenn/_const.py b/sevenn/_const.py index 2d753d04..40351d3c 100644 --- a/sevenn/_const.py +++ b/sevenn/_const.py @@ -52,8 +52,8 @@ SEVENNET_0_22May2024 = f'{_prefix}/SevenNet_0__22May2024/checkpoint_sevennet_0.pth' SEVENNET_l3i5 = f'{_prefix}/SevenNet_l3i5/checkpoint_l3i5.pth' SEVENNET_MF_0 = f'{_prefix}/SevenNet_MF_0/checkpoint_sevennet_mf_0.pth' -SEVENNET_MF_ompa = f'{_prefix}/SevenNet_MF_OMPA/checkpoint_sevennet_mf_ompa.pth' -SEVENNET_omat = f'{_prefix}/SevenNet_OMAT/checkpoint_sevennet_omat.pth' +SEVENNET_MF_ompa = f'{_prefix}/SevenNet_MF_ompa/checkpoint_sevennet_mf_ompa.pth' +SEVENNET_omat = f'{_prefix}/SevenNet_omat/checkpoint_sevennet_omat.pth' _git_prefix = 'https://github.com/MDIL-SNU/SevenNet/releases/download' CHECKPOINT_DOWNLOAD_LINKS = { diff --git a/sevenn/calculator.py b/sevenn/calculator.py index e22e2d46..8004d82e 100644 --- a/sevenn/calculator.py +++ b/sevenn/calculator.py @@ -2,7 +2,7 @@ import os import pathlib import warnings -from typing import Any, Optional, Union +from typing import Any, Dict, Optional, Union import numpy as np import torch @@ -22,17 +22,12 @@ class SevenNetCalculator(Calculator): - """ASE calculator for SevenNet models + """Supporting properties: + 'free_energy', 'energy', 'forces', 'stress', 'energies' + free_energy equals energy. 'energies' stores atomic energy. - Multi-GPU parallel MD is not supported for this mode. - Use LAMMPS for multi-GPU parallel MD. - This class is for convenience who want to run SevenNet models with ase. - - Note than ASE calculator is designed to be interface of other programs. - But in this class, we simply run torch model inside ASE calculator. - So there is no FileIO things. - - Here, free_energy = energy + Multi-GPU acceleration is not supported with ASE calculator. + You should use LAMMPS for the acceleration. """ def __init__( @@ -42,14 +37,28 @@ def __init__( device: Union[torch.device, str] = 'auto', modal: Optional[str] = None, enable_cueq: bool = False, - sevennet_config: Optional[Any] = None, # hold meta information + sevennet_config: Optional[Dict] = None, # Not used in logic, just meta info **kwargs, ): - """Initialize the calculator - - Args: - model (SevenNet): path to the checkpoint file, or pretrained - device (str, optional): Torch device to use. Defaults to "auto". + """Initialize SevenNetCalculator. + + Parameters + ---------- + model: str | Path | AtomGraphSequential, default='7net-0' + Name of pretrained models (7net-mf-ompa, 7net-omat, 7net-l3i5, 7net-0) or + path to the checkpoint, deployed model or the model itself + file_type: str, default='checkpoint' + one of 'checkpoint' | 'torchscript' | 'model_instance' + device: str | torch.device, default='auto' + if not given, use CUDA if available + modal: str | None, default=None + modal (fidelity) if given model is multi-modal model. for 7net-mf-ompa, + it should be one of 'mpa' (MPtrj + sAlex) or 'omat24' (OMat24) + case insensitive + enable_cueq: bool, default=False + if True, use cuEquivariant to accelerate inference. + sevennet_config: dict | None, default=None + Not used, but can be used to carry meta information of this calculator """ super().__init__(**kwargs) self.sevennet_config = None @@ -131,18 +140,21 @@ def __init__( self.model = model_loaded - if isinstance(self.model, AtomGraphSequential) and modal: - if self.model.modal_map is None: - raise ValueError('Modality given, but model has no modal_map') - if modal not in self.model.modal_map: - _modals = list(self.model.modal_map.keys()) - raise ValueError(f'Unknown modal {modal} (not in {_modals})') + self.modal = None + if isinstance(self.model, AtomGraphSequential): + modal_map = self.model.modal_map + if modal_map: + modal_ava = list(modal_map.keys()) + if not modal: + raise ValueError(f'modal argument missing (avail: {modal_ava})') + elif modal not in modal_ava: + raise ValueError(f'unknown modal {modal} (not in {modal_ava})') + self.modal = modal + elif not self.model.modal_map and modal: + warnings.warn(f'modal={modal} is ignored as model has no modal_map') self.model.to(self.device) self.model.eval() - - self.modal = modal - self.implemented_properties = [ 'free_energy', 'energy', @@ -216,6 +228,31 @@ def __init__( cn_cutoff: float = 1600, # au^2, 0.52917726 angstrom = 1 au **kwargs, ): + """Initialize SevenNetD3Calculator. CUDA required. + + Parameters + ---------- + model: str | Path | AtomGraphSequential + Name of pretrained models (7net-mf-ompa, 7net-omat, 7net-l3i5, 7net-0) or + path to the checkpoint, deployed model or the model itself + file_type: str, default='checkpoint' + one of 'checkpoint' | 'torchscript' | 'model_instance' + device: str | torch.device, default='auto' + if not given, use CUDA if available + modal: str | None, default=None + modal (fidelity) if given model is multi-modal model. for 7net-mf-ompa, + it should be one of 'mpa' (MPtrj + sAlex) or 'omat24' (OMat24) + enable_cueq: bool, default=False + if True, use cuEquivariant to accelerate inference. + damping_type: str, default='damp_bj' + Damping type of D3, one of 'damp_bj' | 'damp_zero' + functional_name: str, default='pbe' + Target functional name of D3 parameters. + vdw_cutoff: float, default=9000 + vdw cutoff of D3 calculator in au + cn_cutoff: float, default=1600 + cn cutoff of D3 calculator in au + """ d3_calc = D3Calculator( damping_type=damping_type, functional_name=functional_name, @@ -267,9 +304,7 @@ def _load(name: str) -> ctypes.CDLL: load( name=name, - sources=[ - os.path.join(package_dir, 'pair_e3gnn', 'pair_d3_for_ase.cu') - ], + sources=[os.path.join(package_dir, 'pair_e3gnn', 'pair_d3_for_ase.cu')], extra_cuda_cflags=['-O3', '--expt-relaxed-constexpr', '-fmad=false'], build_directory=compile_dir, verbose=True, diff --git a/sevenn/util.py b/sevenn/util.py index 8f2a2aa8..2077f431 100644 --- a/sevenn/util.py +++ b/sevenn/util.py @@ -2,8 +2,6 @@ import os.path as osp import pathlib import shutil -import sys -from pathlib import Path from typing import Dict, List, Tuple, Union import numpy as np @@ -193,10 +191,10 @@ def infer_irreps_out( def download_checkpoint(path: str, url: str): - # raises permission error if fails fname = osp.basename(path) temp_path = path + '.partial' try: + # raises permission error if fails os.makedirs(osp.dirname(path), exist_ok=True) response = requests.get(url, stream=True, timeout=30) response.raise_for_status() # Raise exception for bad status codes @@ -224,6 +222,7 @@ def download_checkpoint(path: str, url: str): raise except Exception as e: # Clean up partial downloads on failure + # May not work as errors handled internally by tqdm etc. print(f'Download failed: {str(e)}') if os.path.exists(temp_path): print(f'Cleaning up partial download: {temp_path}') From 0ee94027f3ae642d6ec00c4a765634d4eda2547e Mon Sep 17 00:00:00 2001 From: Yutack Park <111348843+YutackPark@users.noreply.github.com> Date: Mon, 17 Mar 2025 14:09:49 +0900 Subject: [PATCH 12/13] Update README.md --- README.md | 68 +++++++++++++++++++++++-------------------------------- 1 file changed, 28 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 79b411d3..52ed5261 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,8 @@ SevenNet (Scalable EquiVariance Enabled Neural Network) is a graph neural network (GNN) interatomic potential package that supports parallel molecular dynamics simulations with [`LAMMPS`](https://lammps.org). Its underlying GNN model is based on [`NequIP`](https://github.com/mir-group/nequip). -> [!CAUTION] -> SevenNet+LAMMPS parallel after the commit id of `14851ef (v0.9.3 ~ 0.9.5)` has a serious bug. -> It gives wrong forces when the number of mpi processes is greater than two. The corresponding pip version is yanked for this reason. The bug is fixed for the main branch since `v0.10.0`, and pip (`v0.9.3.post0`). - +> [!NOTE] +> We will soon release a CUDA-accelerated version of SevenNet, which will significantly increase the speed of our pre-trained models on [Matbench Discovery](https://matbench-discovery.materialsproject.org/). ## Features - Pre-trained GNN interatomic potential and fine-tuning interface. @@ -29,58 +27,56 @@ Additionally, `keywords` can be called in other parts of SevenNet, such as `seve --- ### **SevenNet-MF-ompa (17Mar2025)** -> Keywords in ASE: `7net-mf-ompa` and `SevenNet-mf-ompa` +> Model keywords: `7net-mf-ompa` | `SevenNet-mf-ompa` + +**This is our recommended pre-trained model** + +This model leverages [multi-fidelity learning](https://pubs.acs.org/doi/10.1021/jacs.4c14455) to simultaneously train on the [MPtrj](https://figshare.com/articles/dataset/Materials_Project_Trjectory_MPtrj_Dataset/23713842), [sAlex](https://huggingface.co/datasets/fairchem/OMAT24), and [OMat24](https://huggingface.co/datasets/fairchem/OMAT24) datasets. As of March 17, 2025, it has achieved state-of-the-art performance on the [Matbench Discovery](https://matbench-discovery.materialsproject.org/) in the CPS (Combined Performance Score). We have found that this model outperforms most tasks, except for isolated molecule energy, where it performs slightly worse than SevenNet-l3i5. + +```python +from sevenn.calculator import SevenNetCalculator +# "mpa" refers to the MPtrj + sAlex modal, used for evaluating Matbench Discovery. +calc = SevenNetCalculator('7net-mf-ompa', modal='mpa') # Use modal='omat24' for OMat24-trained modal weights. +``` +Theoretically, the `mpa` modal should produce PBE52 results, while the `omat24` modal yields PBE54 results. -This model leverages multi-fidelity learning to simultaneously train on the [MPtrj](https://figshare.com/articles/dataset/Materials_Project_Trjectory_MPtrj_Dataset/23713842), [sAlex](https://huggingface.co/datasets/fairchem/OMAT24), and [OMat24](https://huggingface.co/datasets/fairchem/OMAT24) datasets. Although its accuracy in **molecular energy** calculations is lower, it outperforms the existing l3i5 model in other computational tasks. This model achieves high accuracy in energy and force calculations and, as of March 17, 2025, has achieved state-of-the-art (SOTA) performance on the CPS metric, newly introduced in this [Matbench Discovery](https://matbench-discovery.materialsproject.org/).
-To use this model, you should specify the `modal` argument. You can use `'mpa'` for PBE52 calculations, and `'omat24'` for PBE54 calculations. Detailed usage instructions for the multi-fidelity (MF) model are available at this [link](https://github.com/MDIL-SNU/SevenNet/blob/main/sevenn/pretrained_potentials/SevenNet_MF_0/README.md).
-The checkpoint available for download via GitHub has been lightweighted by removing some information. The full-information checkpoint can be downloaded through this [link](https://figshare.com/articles/software/7net_MF_ompa/28590722?file=53029859). +When using the command-line interface of SevenNet, include the `--modal mpa` or `--modal omat24` option to select the desired modality. -#### **Training set MAE** -| Energy (meV/atom) | Force (eV/Å) | Stress (kbar) | -|:---:|:---:|:---:| -|11.2|0.053|4.82| #### **Matbench Discovery** | CPS | F1 | $\kappa_{\mathrm{SRME}}$ | RMSD | |:---:|:---:|:---:|:---:| -|0.883|0.901|0.317| 0.0115 | +|**0.883**|**0.901**|0.317| **0.0115** | + +[Detailed instructions for multi-fidelity](https://github.com/MDIL-SNU/SevenNet/blob/main/sevenn/pretrained_potentials/SevenNet_MF_0/README.md) + +[Link to the full-information checkpoint](https://figshare.com/articles/software/7net_MF_ompa/28590722?file=53029859) + --- ### **SevenNet-omat (17Mar2025)** -> Keywords in ASE: `7net-omat` and `SevenNet-omat` +> Model keywords: `7net-omat` | `SevenNet-omat` - This model was trained solely on [OMat24](https://huggingface.co/datasets/fairchem/OMAT24) dataset. Although its accuracy in **molecular energy** calculations is lower, it outperforms the existing l3i5 model in other computational tasks. Due to the POTCAR version differences between the [MPtrj](https://figshare.com/articles/dataset/Materials_Project_Trjectory_MPtrj_Dataset/23713842), and [OMat24](https://huggingface.co/datasets/fairchem/OMAT24) datasets, no Matbench F1 score is available; however, this model shows state-of-the-art (SOTA) performance in thermal conductivity calculations.
The checkpoint available for download via GitHub has been lightweighted by removing some information. The full-information checkpoint can be downloaded through this [link](https://figshare.com/articles/software/SevenNet_omat/28593938). + This model was trained solely on the [OMat24](https://huggingface.co/datasets/fairchem/OMAT24) dataset. It achieves state-of-the-art (SOTA) performance in $\kappa_{\mathrm{SRME}}$ on [Matbench Discovery](https://matbench-discovery.materialsproject.org/); however, the F1 score was not available due to a difference in the POTCAR version. Similar to `SevenNet-MF-ompa`, this model outperforms `SevenNet-l3i5` in most tasks, except for isolated molecule energy. -#### **Training set MAE** -| Energy (meV/atom) | Force (eV/Å) | Stress (kbar) | -|:---:|:---:|:---:| -|14.9|0.073|6.53| +[Link to the full-information checkpoint](https://figshare.com/articles/software/SevenNet_omat/28593938). #### **Matbench Discovery** -* $\kappa_{\mathrm{SRME}}$: 0.221 +* $\kappa_{\mathrm{SRME}}$: **0.221** --- ### **SevenNet-l3i5 (12Dec2024)** -> Keywords in ASE: `7net-l3i5` and `SevenNet-l3i5` - -The model increases the maximum spherical harmonic degree ($l_{\mathrm{max}}$) to 3, compared to **SevenNet-0 (11Jul2024)** with $l_{\mathrm{max}}$ of 2. While **l3i5** offers improved accuracy across various systems compared to **SevenNet-0 (11Jul2024)**, it is approximately four times slower. As of March 17, 2025, this model has achieved state-of-the-art (SOTA) performance on the CPS metric among compliant models, newly introduced in this [Matbench Discovery](https://matbench-discovery.materialsproject.org/). +> Model keywords: `7net-l3i5` | `SevenNet-l3i5` -#### **Training set MAE** -| Energy (meV/atom) | Force (eV/Å) | Stress (kbar) | -|:---:|:---:|:---:| -|8.3|0.029|2.33| +The model increases the maximum spherical harmonic degree ($l_{\mathrm{max}}$) to 3, compared to `SevenNet-0` with $l_{\mathrm{max}}$ of 2. While **l3i5** offers improved accuracy across various systems compared to `SevenNet-0`, it is approximately four times slower. As of March 17, 2025, this model has achieved state-of-the-art (SOTA) performance on the CPS metric among compliant models, newly introduced in this [Matbench Discovery](https://matbench-discovery.materialsproject.org/). #### **Matbench Discovery** | CPS | F1 | $\kappa_{\mathrm{SRME}}$ | RMSD | |:---:|:---:|:---:|:---:| |0.764 |0.76|0.55|0.0182| - -#### **Training time** -381 GPU-days on A100 - --- ### **SevenNet-0 (11Jul2024)** -> Keywords in ASE: `7net-0`, `SevenNet-0`, `7net-0_11Jul2024`, and `SevenNet-0_11Jul2024` +> Model keywords:: `7net-0` | `SevenNet-0` | `7net-0_11Jul2024` | `SevenNet-0_11Jul2024` The model architecture is mainly line with [GNoME](https://github.com/google-deepmind/materials_discovery), a pretrained model that utilizes the NequIP architecture. Five interaction blocks with node features that consist of 128 scalars (*l*=0), 64 vectors (*l*=1), and 32 tensors (*l*=2). @@ -89,19 +85,11 @@ The model was trained with [MPtrj](https://figshare.com/articles/dataset/Materia This model is loaded as the default pre-trained model in ASE calculator. For more information, click [here](sevenn/pretrained_potentials/SevenNet_0__11Jul2024). -#### **Training set MAE** -| Energy (meV/atom) | Force (eV/Å) | Stress (kbar) | -|:---:|:---:|:---:| -|11.5|0.041|2.78| - #### **Matbench Discovery** | F1 | $\kappa_{\mathrm{SRME}}$ | |:---:|:---:| |0.67|0.767| -#### **Training time** -90 GPU-days on A100 - --- In addition to these latest models, you can find our legacy models from [pretrained_potentials](./sevenn/pretrained_potentials). From abb088b47dec2a56fb224ee12a9b0d9dbcc13c74 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Mar 2025 05:10:05 +0000 Subject: [PATCH 13/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 52ed5261..b216fb40 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Additionally, `keywords` can be called in other parts of SevenNet, such as `seve **This is our recommended pre-trained model** -This model leverages [multi-fidelity learning](https://pubs.acs.org/doi/10.1021/jacs.4c14455) to simultaneously train on the [MPtrj](https://figshare.com/articles/dataset/Materials_Project_Trjectory_MPtrj_Dataset/23713842), [sAlex](https://huggingface.co/datasets/fairchem/OMAT24), and [OMat24](https://huggingface.co/datasets/fairchem/OMAT24) datasets. As of March 17, 2025, it has achieved state-of-the-art performance on the [Matbench Discovery](https://matbench-discovery.materialsproject.org/) in the CPS (Combined Performance Score). We have found that this model outperforms most tasks, except for isolated molecule energy, where it performs slightly worse than SevenNet-l3i5. +This model leverages [multi-fidelity learning](https://pubs.acs.org/doi/10.1021/jacs.4c14455) to simultaneously train on the [MPtrj](https://figshare.com/articles/dataset/Materials_Project_Trjectory_MPtrj_Dataset/23713842), [sAlex](https://huggingface.co/datasets/fairchem/OMAT24), and [OMat24](https://huggingface.co/datasets/fairchem/OMAT24) datasets. As of March 17, 2025, it has achieved state-of-the-art performance on the [Matbench Discovery](https://matbench-discovery.materialsproject.org/) in the CPS (Combined Performance Score). We have found that this model outperforms most tasks, except for isolated molecule energy, where it performs slightly worse than SevenNet-l3i5. ```python from sevenn.calculator import SevenNetCalculator