janosh
diff --git a/‎data/training-sets.yml
+13-1 b/‎data/training-sets.yml
+13-1
diff --git a/‎matbench_discovery/enums.py
+4-2 b/‎matbench_discovery/enums.py
+4-2
diff --git a/‎models/deepmd/dpa3-v1-mptrj.yml
+1 b/‎models/deepmd/dpa3-v1-mptrj.yml
+1
diff --git a/‎models/deepmd/dpa3-v1-openlam.yml
+2-1 b/‎models/deepmd/dpa3-v1-openlam.yml
+2-1
diff --git a/‎models/deepmd/dpa3-v2-mptrj.yml
+181 b/‎models/deepmd/dpa3-v2-mptrj.yml
+181
@@ -54,7 +54,8 @@ Alex:
 OMat24:
   title: OMat24
   url: https://huggingface.co/datasets/fairchem/OMAT24#omat24-dataset
-  # this is the number of Alexandria materials that was sampled from, but according to Luis unclear if all were indeed sampled so this is an upper bound
+  # this is the number of Alexandria materials that was sampled from, but according to Luis unclear
+  # if all were indeed picked so this is an upper bound on the number of materials
   n_materials: 3_227_606
   n_structures: 100_824_585
   open: true
@@ -74,3 +75,14 @@ sAlex Validation:
   n_materials: 170_905 # approximate value! TODO confirm this
   n_structures: 553_218
   open: true
+
+OpenLAM:
+  title: OpenLAM dataset v1
+  url: https://aissquare.com/datasets/detail?pageType=datasets&name=LAMBench-TrainingSet-v1&id=308
+  download_url: https://aissquare.com/datasets/detail?pageType=datasets&name=LAMBench-TrainingSet-v1&id=308 # will be combined for downloading soon
+  n_structures: 162_507_178
+  open: true
+  description: |
+    This dataset integrates multidisciplinary DFT data sourced from Deep Modeling community (https://deepmodeling.com)
+    and other open repositories to pre-train large atomic models (LAMs),
+    while intentionally excluding overlap with WBM benchmark systems (e.g., Alex3D structures).
@@ -302,8 +302,10 @@ class Model(Files, base_dir=f"{ROOT}/models"):
     cgcnn_p = auto(), "cgcnn/cgcnn+p.yml"
 
     # DeepMD-DPA3 models
-    dpa3_v1_mptrj = auto(), "deepmd/dpa3-v1-mptrj.yml"
-    dpa3_v1_openlam = auto(), "deepmd/dpa3-v1-openlam.yml"
+    dpa3_v2_mptrj = auto(), "deepmd/dpa3-v2-mptrj.yml"
+    dpa3_v2_openlam = auto(), "deepmd/dpa3-v2-openlam.yml"
+    # dpa3_v1_mptrj = auto(), "deepmd/dpa3-v1-mptrj.yml"
+    # dpa3_v1_openlam = auto(), "deepmd/dpa3-v1-openlam.yml"
 
     # FAIR-Chem
     eqv2_s_dens = auto(), "eqV2/eqV2-s-dens-mp.yml"
 
@@ -40,6 +40,7 @@ targets: EFS_G
 model_type: UIP
 model_params: 3_374_647
 n_estimators: 1
+status: superseded
 
 hyperparams:
   max_force: 0.05
 
@@ -40,6 +40,7 @@ targets: EFS_G
 model_type: UIP
 model_params: 8_184_608
 n_estimators: 1
+status: superseded
 
 hyperparams:
   max_force: 0.05
@@ -93,7 +94,7 @@ requirements:
   pymatgen: 2024.6.10
   numpy: 1.26.4
 
-training_set: [OMat24, MPtrj, sAlex] # need to update to OpenLAM
+training_set: [OpenLAM]
 
 notes:
   Description: |
 
@@ -0,0 +1,181 @@
+model_name: DPA3-v2-MPtrj
+model_key: dpa3-v2-mptrj
+model_version: v0.2 # 2025-03-14
+matbench_discovery_version: 1.3.1
+date_added: "2025-03-14"
+date_published: "2025-03-14"
+authors:
+  - name: Duo Zhang
+    affiliation: AI for Science Institute, Beijing
+    orcid: https://orcid.org/0000-0001-9591-2659
+  - name: Anyang Peng
+    affiliation: AI for Science Institute, Beijing
+    orcid: https://orcid.org/0000-0002-0630-2187
+  - name: Chun Cai
+    affiliation: AI for Science Institute, Beijing
+    orcid: https://orcid.org/0000-0001-6242-0439
+  - name: Linfeng Zhang
+    affiliation: AI for Science Institute, Beijing; DP Technology
+    email: [email protected]
+    corresponding: true
+  - name: Han Wang
+    affiliation: Laboratory of Computational Physics, Institute of Applied Physics and Computational Mathematics
+    email: [email protected]
+    corresponding: true
+trained_by:
+  - name: Duo Zhang
+    affiliation: AI for Science Institute, Beijing
+    orcid: https://orcid.org/0000-0001-9591-2659
+repo: https://github.com/deepmodeling/deepmd-kit/tree/devel
+url: https://github.com/deepmodeling/deepmd-kit/tree/devel
+doi: https://github.com/deepmodeling/deepmd-kit/tree/devel # to be released soon
+paper: https://github.com/deepmodeling/deepmd-kit/tree/devel # to be released soon
+pr_url: https://github.com/janosh/matbench-discovery/pull/222
+trained_for_benchmark: true
+
+openness: OSOD
+train_task: S2EFS
+test_task: IS2RE-SR
+targets: EFS_G
+model_type: UIP
+model_params: 4_923_959
+n_estimators: 1
+
+hyperparams:
+  max_force: 0.05
+  max_steps: 500
+  ase_optimizer: FIRE
+  cell_filter: ExpCellFilter
+  n_layers: 24
+  e_rcut: 6.0
+  a_rcut: 4.0
+  n_dim: 128
+  e_dim: 64
+  a_dim: 32
+  optimizer: Adam
+  round1:
+    loss: MSE
+    loss_weights:
+      energy: 0.2 -> 20
+      force: 100 -> 20
+      virial: 0.02 -> 1
+    initial_learning_rate: 0.001
+    learning_rate_schedule: ExpLR - start_lr=0.001, decay_steps=5000, stop_lr=0.00001
+    training_steps: 2000000
+  round2:
+    loss: Huber
+    loss_weights:
+      energy: 15
+      force: 1
+      virial: 2.5
+    initial_learning_rate: 0.0002
+    learning_rate_schedule: ExpLR - start_lr=0.0002, decay_steps=5000, stop_lr=0.00001
+    training_steps: 1000000
+  batch_size: 64 # 16 (gpus) * 4 (batch per gpu) = 64 (total batch size)
+  epochs: 120 # round1 80 + round2 40
+
+requirements:
+  torch: 2.3.1
+  torch-geometric: 2.5.2
+  ase: 3.23.0
+  pymatgen: 2024.6.10
+  numpy: 1.26.4
+
+training_set: [MPtrj]
+
+notes:
+  Description: |
+    DPA3 is an advanced interatomic potential leveraging the message passing architecture, implemented within the DeePMD-kit framework, available at GitHub(https://github.com/deepmodeling/deepmd-kit/tree/devel).
+    Designed as a large atomic model (LAM), DPA3 is tailored to integrate and simultaneously train on datasets from various disciplines, encompassing diverse chemical and materials systems across different research domains.
+    Its model design ensures exceptional fitting accuracy and robust generalization both within and beyond the training domain.
+    Furthermore, DPA3 maintains energy conservation and respects the physical symmetries of the potential energy surface, making it a dependable tool for a wide range of scientific applications.
+
+metrics:
+  phonons:
+    kappa_103:
+      κ_SRME: 0.959
+      pred_file: models/deepmd/dpa3-v2-mptrj/2025-03-14-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
+      pred_file_url: https://figshare.com/files/52988744
+  geo_opt:
+    pred_file: models/deepmd/dpa3-v2-mptrj/2025-03-14-wbm-geo-opt.json.gz
+    struct_col: dp_structure
+    pred_file_url: https://figshare.com/files/53018849
+    symprec=1e-2:
+      rmsd: 0.0164 # Å
+      n_sym_ops_mae: 1.968 # unitless
+      symmetry_decrease: 0.0601 # fraction
+      symmetry_match: 0.8052 # fraction
+      symmetry_increase: 0.1273 # fraction
+      n_structures: 256963 # count
+      analysis_file: models/deepmd/dpa3-v2-mptrj/2025-03-14-wbm-geo-opt-symprec=1e-2-moyo=0.4.2.csv.gz
+      analysis_file_url: https://figshare.com/files/53019278
+    symprec=1e-5:
+      rmsd: 0.0164 # Å
+      n_sym_ops_mae: 2.1461 # unitless
+      symmetry_decrease: 0.0766 # fraction
+      symmetry_match: 0.7154 # fraction
+      symmetry_increase: 0.2014 # fraction
+      n_structures: 256963 # count
+      analysis_file: models/deepmd/dpa3-v2-mptrj/2025-03-14-wbm-geo-opt-symprec=1e-5-moyo=0.4.2.csv.gz
+      analysis_file_url: https://figshare.com/files/53019281
+  discovery:
+    pred_file: models/deepmd/dpa3-v2-mptrj/2025-03-14-wbm-IS2RE.csv.gz
+    pred_file_url: https://figshare.com/files/53018801
+    pred_col: e_form_per_atom_dp
+    full_test_set:
+      F1: 0.774 # fraction
+      DAF: 4.25 # dimensionless
+      Precision: 0.729 # fraction
+      Recall: 0.825 # fraction
+      Accuracy: 0.917 # fraction
+      TPR: 0.825 # fraction
+      FPR: 0.064 # fraction
+      TNR: 0.936 # fraction
+      FNR: 0.175 # fraction
+      TP: 36393.0 # count
+      FP: 13519.0 # count
+      TN: 199352.0 # count
+      FN: 7699.0 # count
+      MAE: 0.038 # eV/atom
+      RMSE: 0.082 # eV/atom
+      R2: 0.796 # dimensionless
+      missing_preds: 0 # count
+      missing_percent: 0.00% # fraction
+    most_stable_10k:
+      F1: 0.980 # fraction
+      DAF: 6.280 # dimensionless
+      Precision: 0.960 # fraction
+      Recall: 1.0 # fraction
+      Accuracy: 0.960 # fraction
+      TPR: 1.0 # fraction
+      FPR: 1.0 # fraction
+      TNR: 0.0 # fraction
+      FNR: 0.0 # fraction
+      TP: 9600.0 # count
+      FP: 400.0 # count
+      TN: 0.0 # count
+      FN: 0.0 # count
+      MAE: 0.032 # eV/atom
+      RMSE: 0.078 # eV/atom
+      R2: 0.866 # dimensionless
+      missing_preds: 0 # count
+      missing_percent: 0.00% # fraction
+    unique_prototypes:
+      F1: 0.786 # fraction
+      DAF: 4.760 # dimensionless
+      Precision: 0.737 # fraction
+      Recall: 0.841 # fraction
+      Accuracy: 0.929 # fraction
+      TPR: 0.841 # fraction
+      FPR: 0.055 # fraction
+      TNR: 0.945 # fraction
+      FNR: 0.159 # fraction
+      TP: 28073.0 # count
+      FP: 10008.0 # count
+      TN: 172106.0 # count
+      FN: 5301.0 # count
+      MAE: 0.039 # eV/atom
+      RMSE: 0.081 # eV/atom
+      R2: 0.804 # dimensionless
+      missing_preds: 0 # count
+      missing_percent: 0.00% # fraction