From bb8a86af9953abe7c171e95a5c1258abbf5777cd Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Thu, 16 Jan 2025 09:46:31 +0100 Subject: [PATCH 01/13] Update codecarbonVersFormulaire.md --- .../bash/codecarbonVersFormulaire.md | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index 1400c1d..c2c2ede 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -91,3 +91,131 @@ Dans la suite les champs du datamodel son exprimé en fonction des éléments et codecarbon ==> \[measures](measures.*)measurementMethod = M11 **kWh** ==> \[measures](measures.*)unit= M19 + + + +Example of a python script to extract these fields: + +```py +import torch +import torch.nn as nn +import torch.optim as optim +from codecarbon import EmissionsTracker +import platform +import os +import time +from datetime import datetime +import pkg_resources + +# Generate synthetic data +torch.manual_seed(42) +n_samples = 100 +X = torch.rand(n_samples, 1) * 10 +true_slope = 2.5 +true_intercept = 1.0 +noise = torch.randn(n_samples, 1) * 2 +y = true_slope * X + true_intercept + noise + +# Define linear regression model +class LinearRegressionModel(nn.Module): + def __init__(self): + super(LinearRegressionModel, self).__init__() + self.linear = nn.Linear(1, 1) + + def forward(self, x): + return self.linear(x) + +# Initialize model, loss, and optimizer +model = LinearRegressionModel() +criterion = nn.MSELoss() +optimizer = optim.SGD(model.parameters(), lr=0.01) + +# Initialize CodeCarbon tracker +tracker = EmissionsTracker(project_name="Linear Regression Training") +tracker.start() + +# Measure training start time +start_time = time.time() + +# Training loop +num_epochs = 500 +for epoch in range(num_epochs): + y_pred = model(X) + loss = criterion(y_pred, y) + optimizer.zero_grad() + loss.backward() + optimizer.step() + +# Measure training end time +end_time = time.time() +training_duration = end_time - start_time + +# Stop tracker +emissions = tracker.stop() + +# Field extraction +def get_field_or_none(obj, attr, default=None): + return getattr(obj, attr, default) + +try: + codecarbon_version = pkg_resources.get_distribution("codecarbon").version +except Exception: + codecarbon_version = None + +fields = { + "run_id": get_field_or_none(tracker, "_experiment_id"), + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "project_name": "Linear Regression Training", + "duration": training_duration, + "emissions": emissions, + "emissions_rate": None, # Emissions rate is not directly available + "cpu_power": get_field_or_none(tracker, "_cpu_power"), + "gpu_power": get_field_or_none(tracker, "_gpu_power"), + "ram_power": get_field_or_none(tracker, "_ram_power"), + "cpu_energy": get_field_or_none(tracker, "_cpu_energy"), + "gpu_energy": get_field_or_none(tracker, "_gpu_energy"), + "ram_energy": get_field_or_none(tracker, "_ram_energy"), + "energy_consumed": float(get_field_or_none(tracker, "_total_energy", 0)), + "country_name": None, # Country name not directly available + "country_iso_code": None, # Country ISO code not directly available + "region": None, # Region not directly available + "cloud_provider": os.environ.get("CLOUD_PROVIDER", "None"), + "cloud_region": os.environ.get("CLOUD_REGION", "None"), + "os": platform.system(), + "python_version": platform.python_version(), + "codecarbon_version": codecarbon_version, + "cpu_count": os.cpu_count(), + "cpu_model": platform.processor(), + "gpu_count": 0, # CodeCarbon doesn't provide GPU count in this setup + "gpu_model": None, # GPU model not provided in CPU-only runs + "longitude": None, # Longitude not directly available + "latitude": None, # Latitude not directly available + "ram_total_size": round(os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") / (1024**3), 2), + "tracking_mode": get_field_or_none(tracker, "_tracking_mode"), + "on_cloud": "Yes" if os.environ.get("CLOUD_PROVIDER") else "No", + "pue": get_field_or_none(tracker, "_pue"), + "extra": get_field_or_none(tracker, "_measure_power_method"), + "kWh": "kWh" # Assumed as the default unit for power consumption +} + +# Print extracted fields +for key, value in fields.items(): + print(f"{key}: {value}") +``` + +Tested on CPU only: +Some fields were not found including + +- emissions_rate +- cpu_energy +- ram_energy +- country_name +- country_iso_code +- region +- cloud_provider +- cloud_region +- cpu_model +- longitude +- latitude +- on_cloud (No because uses global var but can hardly be extracted) +- extra From 335f24fde3aa9e145a20a844e0753f3ee1e1b4ca Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Thu, 16 Jan 2025 10:09:28 +0100 Subject: [PATCH 02/13] Update codecarbonVersFormulaire.md --- .../small-automation/bash/codecarbonVersFormulaire.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index c2c2ede..be16fb2 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -213,7 +213,7 @@ Some fields were not found including - country_iso_code - region - cloud_provider -- cloud_region +- cloud_region How is this different from region? Since we already have the on_cloud field? - cpu_model - longitude - latitude From b00b46503f9b75b0e655a33217675cadcc9aa605 Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Thu, 16 Jan 2025 10:19:40 +0100 Subject: [PATCH 03/13] Update codecarbonVersFormulaire.md --- .../bash/codecarbonVersFormulaire.md | 255 +++++++++++------- 1 file changed, 152 insertions(+), 103 deletions(-) diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index be16fb2..b263f71 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -97,125 +97,174 @@ codecarbon ==> \[measures](measures.*)measurementMethod = M11 Example of a python script to extract these fields: ```py -import torch -import torch.nn as nn -import torch.optim as optim -from codecarbon import EmissionsTracker import platform import os -import time +import requests +from codecarbon import EmissionsTracker from datetime import datetime import pkg_resources -# Generate synthetic data -torch.manual_seed(42) -n_samples = 100 -X = torch.rand(n_samples, 1) * 10 -true_slope = 2.5 -true_intercept = 1.0 -noise = torch.randn(n_samples, 1) * 2 -y = true_slope * X + true_intercept + noise - -# Define linear regression model -class LinearRegressionModel(nn.Module): - def __init__(self): - super(LinearRegressionModel, self).__init__() - self.linear = nn.Linear(1, 1) - - def forward(self, x): - return self.linear(x) - -# Initialize model, loss, and optimizer -model = LinearRegressionModel() -criterion = nn.MSELoss() -optimizer = optim.SGD(model.parameters(), lr=0.01) - -# Initialize CodeCarbon tracker +def get_cpu_model(): + """ + Fetch the CPU model using platform or /proc/cpuinfo (Linux-specific). + """ + try: + # Try platform.processor (may return empty on some systems) + cpu_model = platform.processor() + if cpu_model: + return cpu_model + + # Fallback to reading /proc/cpuinfo + if os.path.exists("/proc/cpuinfo"): + with open("/proc/cpuinfo", "r") as f: + for line in f: + if "model name" in line: + return line.split(":")[1].strip() + except Exception as e: + print(f"Error fetching CPU model: {e}") + return None + +def extract_fields(tracker, emissions, duration): + # Use a utility function to handle missing attributes + def get_field_or_none(obj, attr, default=None): + return getattr(obj, attr, default) + + # Get location information via external API + def get_location_info(): + try: + response = requests.get("http://ip-api.com/json/") + if response.status_code == 200: + data = response.json() + return { + "country_name": data.get("country"), + "country_iso_code": data.get("countryCode"), + "region": data.get("regionName"), + "longitude": data.get("lon"), + "latitude": data.get("lat"), + } + except Exception: + pass + return {"country_name": None, "country_iso_code": None, "region": None, "longitude": None, "latitude": None} + + # Fetch CodeCarbon version + try: + codecarbon_version = pkg_resources.get_distribution("codecarbon").version + except Exception: + codecarbon_version = None + + # Location information + location_info = get_location_info() + + # Extract power values + cpu_power = get_field_or_none(tracker, "_cpu_power", 0) # kW + gpu_power = get_field_or_none(tracker, "_gpu_power", 0) # kW + ram_power = get_field_or_none(tracker, "_ram_power", 0) # kW + + # Calculate energy consumption (kWh) + duration_hours = duration / 3600 # Convert seconds to hours + cpu_energy = cpu_power * duration_hours if cpu_power else None + gpu_energy = gpu_power * duration_hours if gpu_power else None + ram_energy = ram_power * duration_hours if ram_power else None + + # Extract fields + fields = { + "run_id": get_field_or_none(tracker, "_experiment_id"), + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "project_name": "Linear Regression Training", + "duration": duration, + "emissions": emissions, + "emissions_rate": emissions / duration if duration else None, # Calculate manually + "cpu_power": cpu_power, + "gpu_power": gpu_power, + "ram_power": ram_power, + "cpu_energy": cpu_energy, + "gpu_energy": gpu_energy, + "ram_energy": ram_energy, + "energy_consumed": float(get_field_or_none(tracker, "_total_energy", 0)), + "country_name": location_info["country_name"], + "country_iso_code": location_info["country_iso_code"], + "region": location_info["region"], + "cloud_provider": os.environ.get("CLOUD_PROVIDER", "None"), + "cloud_region": os.environ.get("CLOUD_REGION", "None"), + "os": platform.system(), + "python_version": platform.python_version(), + "codecarbon_version": codecarbon_version, + "cpu_count": os.cpu_count(), + "cpu_model": get_cpu_model(), # Updated CPU model extraction + "gpu_count": 0, # No GPU detected + "gpu_model": None, + "longitude": location_info["longitude"], + "latitude": location_info["latitude"], + "ram_total_size": round(os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") / (1024**3), 2), + "tracking_mode": get_field_or_none(tracker, "_tracking_mode"), + "on_cloud": "Yes" if os.environ.get("CLOUD_PROVIDER") else "No", + "pue": get_field_or_none(tracker, "_pue", 1.0), # Default PUE is 1.0 + "extra": get_field_or_none(tracker, "_measure_power_method"), + "kWh": "kWh", + } + return fields + +# Example usage tracker = EmissionsTracker(project_name="Linear Regression Training") tracker.start() -# Measure training start time -start_time = time.time() - -# Training loop -num_epochs = 500 -for epoch in range(num_epochs): - y_pred = model(X) - loss = criterion(y_pred, y) - optimizer.zero_grad() - loss.backward() - optimizer.step() - -# Measure training end time -end_time = time.time() -training_duration = end_time - start_time - -# Stop tracker +# Simulate task duration and emissions +import time +time.sleep(1) # Simulate task duration emissions = tracker.stop() +duration = 1 # Replace with actual duration in seconds -# Field extraction -def get_field_or_none(obj, attr, default=None): - return getattr(obj, attr, default) - -try: - codecarbon_version = pkg_resources.get_distribution("codecarbon").version -except Exception: - codecarbon_version = None - -fields = { - "run_id": get_field_or_none(tracker, "_experiment_id"), - "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "project_name": "Linear Regression Training", - "duration": training_duration, - "emissions": emissions, - "emissions_rate": None, # Emissions rate is not directly available - "cpu_power": get_field_or_none(tracker, "_cpu_power"), - "gpu_power": get_field_or_none(tracker, "_gpu_power"), - "ram_power": get_field_or_none(tracker, "_ram_power"), - "cpu_energy": get_field_or_none(tracker, "_cpu_energy"), - "gpu_energy": get_field_or_none(tracker, "_gpu_energy"), - "ram_energy": get_field_or_none(tracker, "_ram_energy"), - "energy_consumed": float(get_field_or_none(tracker, "_total_energy", 0)), - "country_name": None, # Country name not directly available - "country_iso_code": None, # Country ISO code not directly available - "region": None, # Region not directly available - "cloud_provider": os.environ.get("CLOUD_PROVIDER", "None"), - "cloud_region": os.environ.get("CLOUD_REGION", "None"), - "os": platform.system(), - "python_version": platform.python_version(), - "codecarbon_version": codecarbon_version, - "cpu_count": os.cpu_count(), - "cpu_model": platform.processor(), - "gpu_count": 0, # CodeCarbon doesn't provide GPU count in this setup - "gpu_model": None, # GPU model not provided in CPU-only runs - "longitude": None, # Longitude not directly available - "latitude": None, # Latitude not directly available - "ram_total_size": round(os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") / (1024**3), 2), - "tracking_mode": get_field_or_none(tracker, "_tracking_mode"), - "on_cloud": "Yes" if os.environ.get("CLOUD_PROVIDER") else "No", - "pue": get_field_or_none(tracker, "_pue"), - "extra": get_field_or_none(tracker, "_measure_power_method"), - "kWh": "kWh" # Assumed as the default unit for power consumption -} +fields = extract_fields(tracker, emissions, duration) # Print extracted fields for key, value in fields.items(): print(f"{key}: {value}") ``` +Output: + +``` +run_id: 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 +timestamp: 2025-01-16 10:15:55 +project_name: Linear Regression Training +duration: 1 +emissions: 7.412917773134365e-07 +emissions_rate: 7.412917773134365e-07 +cpu_power: Power(kW=0.0425) +gpu_power: Power(kW=0.0) +ram_power: Power(kW=0.0050578808784484865) +cpu_energy: Power(kW=1.1805555555555557e-05) +gpu_energy: Power(kW=0.0) +ram_energy: Power(kW=1.4049669106801352e-06) +energy_consumed: 1.3228140711173227e-05 +country_name: France +country_iso_code: FR +region: Île-de-France +cloud_provider: None +cloud_region: None +os: Linux +python_version: 3.13.1 +codecarbon_version: 2.8.2 +cpu_count: 12 +cpu_model: AMD Ryzen 5 7530U with Radeon Graphics +gpu_count: 0 +gpu_model: None +longitude: 2.2463 +latitude: 48.7144 +ram_total_size: 13.49 +tracking_mode: machine +on_cloud: No +pue: 1.0 +extra: None +kWh: kWh +``` + Tested on CPU only: Some fields were not found including -- emissions_rate -- cpu_energy -- ram_energy -- country_name -- country_iso_code -- region -- cloud_provider -- cloud_region How is this different from region? Since we already have the on_cloud field? -- cpu_model -- longitude -- latitude -- on_cloud (No because uses global var but can hardly be extracted) -- extra +- cloud_provider -> fetched using CLOUD_PROVIDER OK? +- cloud_region How is this different from region? Since we already have the on_cloud field? currently fetched using CLOUD_REGION if different? +- cpu_model -> fetched on my Linux system but needs to be updated for other platforms in python code +- on_cloud -> yes if CLOUD_PROVIDER found, OK? +- extra -> what is this? +- tracking mode is a field in code carbon _tracking_mode, should we use this or M14/M15? From 2d40e868a01f847293b6550b228655c8da6ac8a2 Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Thu, 16 Jan 2025 10:26:36 +0100 Subject: [PATCH 04/13] Update codecarbonVersFormulaire.md --- .../bash/codecarbonVersFormulaire.md | 105 +++++++++++------- 1 file changed, 66 insertions(+), 39 deletions(-) diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index b263f71..2b13e6b 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -94,27 +94,26 @@ codecarbon ==> \[measures](measures.*)measurementMethod = M11 -Example of a python script to extract these fields: +Example of a python script to extract these fields while training a regression model: ```py +import torch +import torch.nn as nn +import torch.optim as optim +from codecarbon import EmissionsTracker import platform import os import requests -from codecarbon import EmissionsTracker +import time from datetime import datetime import pkg_resources +# Fetch CPU model def get_cpu_model(): - """ - Fetch the CPU model using platform or /proc/cpuinfo (Linux-specific). - """ try: - # Try platform.processor (may return empty on some systems) cpu_model = platform.processor() if cpu_model: return cpu_model - - # Fallback to reading /proc/cpuinfo if os.path.exists("/proc/cpuinfo"): with open("/proc/cpuinfo", "r") as f: for line in f: @@ -124,12 +123,11 @@ def get_cpu_model(): print(f"Error fetching CPU model: {e}") return None +# Extract tracking fields def extract_fields(tracker, emissions, duration): - # Use a utility function to handle missing attributes def get_field_or_none(obj, attr, default=None): return getattr(obj, attr, default) - # Get location information via external API def get_location_info(): try: response = requests.get("http://ip-api.com/json/") @@ -146,34 +144,28 @@ def extract_fields(tracker, emissions, duration): pass return {"country_name": None, "country_iso_code": None, "region": None, "longitude": None, "latitude": None} - # Fetch CodeCarbon version try: codecarbon_version = pkg_resources.get_distribution("codecarbon").version except Exception: codecarbon_version = None - # Location information location_info = get_location_info() + cpu_power = get_field_or_none(tracker, "_cpu_power", 0) + gpu_power = get_field_or_none(tracker, "_gpu_power", 0) + ram_power = get_field_or_none(tracker, "_ram_power", 0) + duration_hours = duration / 3600 - # Extract power values - cpu_power = get_field_or_none(tracker, "_cpu_power", 0) # kW - gpu_power = get_field_or_none(tracker, "_gpu_power", 0) # kW - ram_power = get_field_or_none(tracker, "_ram_power", 0) # kW - - # Calculate energy consumption (kWh) - duration_hours = duration / 3600 # Convert seconds to hours cpu_energy = cpu_power * duration_hours if cpu_power else None gpu_energy = gpu_power * duration_hours if gpu_power else None ram_energy = ram_power * duration_hours if ram_power else None - # Extract fields fields = { "run_id": get_field_or_none(tracker, "_experiment_id"), "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "project_name": "Linear Regression Training", "duration": duration, "emissions": emissions, - "emissions_rate": emissions / duration if duration else None, # Calculate manually + "emissions_rate": emissions / duration if duration else None, "cpu_power": cpu_power, "gpu_power": gpu_power, "ram_power": ram_power, @@ -190,33 +182,68 @@ def extract_fields(tracker, emissions, duration): "python_version": platform.python_version(), "codecarbon_version": codecarbon_version, "cpu_count": os.cpu_count(), - "cpu_model": get_cpu_model(), # Updated CPU model extraction - "gpu_count": 0, # No GPU detected + "cpu_model": get_cpu_model(), + "gpu_count": 0, "gpu_model": None, "longitude": location_info["longitude"], "latitude": location_info["latitude"], "ram_total_size": round(os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") / (1024**3), 2), "tracking_mode": get_field_or_none(tracker, "_tracking_mode"), "on_cloud": "Yes" if os.environ.get("CLOUD_PROVIDER") else "No", - "pue": get_field_or_none(tracker, "_pue", 1.0), # Default PUE is 1.0 + "pue": get_field_or_none(tracker, "_pue", 1.0), "extra": get_field_or_none(tracker, "_measure_power_method"), "kWh": "kWh", } return fields -# Example usage +# Generate synthetic data +torch.manual_seed(42) +n_samples = 100 +X = torch.rand(n_samples, 1) * 10 +true_slope = 2.5 +true_intercept = 1.0 +noise = torch.randn(n_samples, 1) * 2 +y = true_slope * X + true_intercept + noise + +# Define the linear regression model +class LinearRegressionModel(nn.Module): + def __init__(self): + super(LinearRegressionModel, self).__init__() + self.linear = nn.Linear(1, 1) + + def forward(self, x): + return self.linear(x) + +# Initialize the model, loss function, and optimizer +model = LinearRegressionModel() +criterion = nn.MSELoss() +optimizer = optim.SGD(model.parameters(), lr=0.01) + +# Initialize the CodeCarbon tracker tracker = EmissionsTracker(project_name="Linear Regression Training") tracker.start() -# Simulate task duration and emissions -import time -time.sleep(1) # Simulate task duration +# Measure training start time +start_time = time.time() + +# Training loop +num_epochs = 500 +for epoch in range(num_epochs): + y_pred = model(X) + loss = criterion(y_pred, y) + optimizer.zero_grad() + loss.backward() + optimizer.step() + if (epoch + 1) % 50 == 0: + print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}") + +# Measure training end time and stop tracker +end_time = time.time() +training_duration = end_time - start_time emissions = tracker.stop() -duration = 1 # Replace with actual duration in seconds - -fields = extract_fields(tracker, emissions, duration) -# Print extracted fields +# Extract and print tracking fields +fields = extract_fields(tracker, emissions, training_duration) for key, value in fields.items(): print(f"{key}: {value}") ``` @@ -225,18 +252,18 @@ Output: ``` run_id: 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 -timestamp: 2025-01-16 10:15:55 +timestamp: 2025-01-16 10:25:02 project_name: Linear Regression Training -duration: 1 -emissions: 7.412917773134365e-07 -emissions_rate: 7.412917773134365e-07 +duration: 0.1512455940246582 +emissions: 1.1263528505851737e-07 +emissions_rate: 7.447177934991875e-07 cpu_power: Power(kW=0.0425) gpu_power: Power(kW=0.0) ram_power: Power(kW=0.0050578808784484865) -cpu_energy: Power(kW=1.1805555555555557e-05) +cpu_energy: Power(kW=1.785538262791104e-06) gpu_energy: Power(kW=0.0) -ram_energy: Power(kW=1.4049669106801352e-06) -energy_consumed: 1.3228140711173227e-05 +ram_energy: Power(kW=2.1249505499080597e-07) +energy_consumed: 2.0099445932032577e-06 country_name: France country_iso_code: FR region: Île-de-France From edfbad9367665bdd438a5b947104d1124d1d969b Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Thu, 16 Jan 2025 10:32:43 +0100 Subject: [PATCH 05/13] Update README.md --- .../small-automation/bash/README.md | 49 +++++++++++++++++-- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/tools/json_generator/small-automation/bash/README.md b/tools/json_generator/small-automation/bash/README.md index 375ff06..1ce7711 100644 --- a/tools/json_generator/small-automation/bash/README.md +++ b/tools/json_generator/small-automation/bash/README.md @@ -1,15 +1,56 @@ How to use these scripts to generate the json report following the ai power measurement sharing ? -1. Edit the form_gen.conf to specify how many objects you want to generate in the form +1. Edit the form_Gen.conf to specify how many objects you want to generate in the form + 2. Use the gen_form.sh script - ./gen_form.sh > + ./gen_form.sh > + generation type can be -a for all fields, -m for mandatory fields + is form_Gen.conf in this directory + can be a txt file + + Example: ./gen_form.sh -a form_Gen.conf > report1.txt + This script uses the references.param file which has been created manually from the json datamodel. Please don't modify this file. + It uses the parameters file to create the form with the desired number of objects instances. - The form is written in the filename.txt -3. Edit the filename.txt to complete the fields directly inside this file before using the second script. + The form is written in the . + +3. Edit the to complete the fields directly inside this file before using the second script. + 4. Transform this text file into our json model using the form2json.sh ./form2json.sh > You just created your first report ! troubleshooting : - if you have this error : "/bin/bash^M: bad interpreter: No such file or directory" -> use this command : sed -i -e 's/\r$//' + +Why is ├quantization mandatory? +├cpuTrackingMode what is this? +├gpuTrackingMode what is this? + +├averageUtilizationCpu how to get this, same for gpu? + + +Lots of optional fields make it annoying to create the report +├formatVersionSpecificationUri + + +What can be automated / improved? + +├licensing → MULTI select common license or other +├formatVersion → always use same format for all documents 0.0.1 +├reportId → generate UUID automatically +├reportDatetime → generate date automatically +├reportStatus → MULTI draft, final, corrective, $other + + +├confidentialityLevel → MULTI public, internal, confidential, secret + +├publicKey generate automatically? + + +Can be automated in python: + ├os= + ├distribution= + ├distributionVersion= + From 9bdf1fc104b79bcb3bef078969899c19cdf1b0d7 Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Thu, 23 Jan 2025 07:37:33 +0100 Subject: [PATCH 06/13] Update codecarbonVersFormulaire.md --- .../bash/codecarbonVersFormulaire.md | 215 ++++++++++-------- 1 file changed, 123 insertions(+), 92 deletions(-) diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index 2b13e6b..91e83b0 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -2,98 +2,6 @@ ## Informations -Version : v0.1 - -Etat : Travail - -RAF : - -- Détailler les elemetns -- Affiner les réponses -- Choisir les numeros d'item judicieusement -- ... - -## Contenu - -Dans la suite les champs du datamodel son exprimé en fonction des éléments et valeurs du fichier form_example.txt - -**run_id** : [header]reportId= H4 - -**timestamp** : - -- \[header]├reportDatetime= H5 -- ET -- \[measures](measures.1)measurementDateTime= M114 - -**project_name** : - -**duration** : \[measures](measures.1)measurementDuration = M113 - -**emissions** : - -**emissions_rate** : - -**cpu_power** : - -**gpu_power** : - -**ram_power** : - -**cpu_energy** : - -**gpu_energy** : - -**ram_energy** : - -**energy_consumed** : \[measures](measures.1)powerConsumption = M112 - -**country_name** : \[environment]country = E1 - -**country_iso_code** : - -**region** : - -**cloud_provider** : \[infrastructure]cloudProvider = I2 - -**cloud_region** : - -**os** : \[system]os = S1 - -**python_version** : A METTRE DASN SOFTWARE ? - -**codecarbon_version** : \[measures](measures.1)version = M13 - -**cpu_count** : \[infrastructure](components.1)nbComponent = IC12 = "Intel(R) Xeon(R) Gold 6226R CPU @ 2.90GHz" ? - -**cpu_model** : \[infrastructure](components.1)componentName = IC11 - -**gpu_count** : \[infrastructure](components.2)nbComponent= IC12 - -**gpu_model** : \[infrastructure](components.2)componentName= IC11 = "2 x Tesla V100S-PCIE-32GB" - -**longitude** : \[environment]longitude = E3 - -**latitude** : \[environment]latitude = E2 - -**ram_total_size** : \[infrastructure](components.3)memorySize = IC13 - -**tracking_mode** : \[measures](measures.1) - -- cpuTrackingMode = M14 -- OU -- gpuTrackingMode= M15 ? - -**on_cloud** : - -**pue** : - -**Extra** : -codecarbon ==> \[measures](measures.*)measurementMethod = M11 - -**kWh** ==> \[measures](measures.*)unit= M19 - - - Example of a python script to extract these fields while training a regression model: ```py @@ -295,3 +203,126 @@ Some fields were not found including - on_cloud -> yes if CLOUD_PROVIDER found, OK? - extra -> what is this? - tracking mode is a field in code carbon _tracking_mode, should we use this or M14/M15? + +# CodeCarbon Tracking Fields Documentation + +This document provides a comprehensive explanation of how each key-value pair is extracted and what it represents in the context of the `CodeCarbon` emissions tracking script. + +--- + +## Table of Key-Value Pairs + +| **Key** | **What It Represents** | +|------------------------|---------------------------------------------------------------------------------------| +| `run_id` | A unique identifier for the current run, generated by the `CodeCarbon` tracker. | +| `timestamp` | The current date and time when the tracking data was extracted. | +| `project_name` | The name of the project being tracked (e.g., "Linear Regression Training"). | +| `duration` | The total time (in seconds) taken for the training process. | +| `emissions` | The total carbon emissions (in kg of CO₂) produced during the training. | +| `emissions_rate` | The rate of carbon emissions (in kg of CO₂ per second) during the training. | +| `cpu_power` | The power consumption (in kW) of the CPU during the training. | +| `gpu_power` | The power consumption (in kW) of the GPU during the training. | +| `ram_power` | The power consumption (in kW) of the RAM during the training. | +| `cpu_energy` | The total energy consumed (in kWh) by the CPU during the training. | +| `gpu_energy` | The total energy consumed (in kWh) by the GPU during the training. | +| `ram_energy` | The total energy consumed (in kWh) by the RAM during the training. | +| `energy_consumed` | The total energy consumed (in kWh) by the system during the training. | +| `country_name` | The name of the country where the training was executed. | +| `country_iso_code` | The ISO code of the country where the training was executed. | +| `region` | The region (e.g., state or province) where the training was executed. | +| `cloud_provider` | The cloud provider used for the training (if applicable). | +| `cloud_region` | The region of the cloud provider used for the training (if applicable). | +| `os` | The operating system on which the training was executed. | +| `python_version` | The version of Python used to run the script. | +| `codecarbon_version` | The version of the `CodeCarbon` library used for tracking. | +| `cpu_count` | The number of CPU cores available on the system. | +| `cpu_model` | The model name of the CPU used for the training. | +| `gpu_count` | The number of GPUs available on the system. | +| `gpu_model` | The model name of the GPU used for the training (if applicable). | +| `longitude` | The longitude of the location where the training was executed. | +| `latitude` | The latitude of the location where the training was executed. | +| `ram_total_size` | The total size of the RAM (in GB) available on the system. | +| `tracking_mode` | The mode used by `CodeCarbon` for tracking (e.g., "machine" for local tracking). | +| `on_cloud` | Indicates whether the training was executed on a cloud provider (Yes/No). | +| `pue` | The Power Usage Effectiveness (PUE) of the data center (if applicable). | +| `extra` | Additional information about the power measurement method used by `CodeCarbon`. | +| `kWh` | The unit of energy measurement (kilowatt-hours). | + +The table will be soon be updated with paths to variables in the report.txt generated using the bash script. +--- + +## How Each Key-Value Pair is Extracted + +1. **`run_id`**: Extracted from the `CodeCarbon` tracker object using `get_field_or_none(tracker, "_experiment_id")`. +2. **`timestamp`**: Generated using `datetime.now().strftime("%Y-%m-%d %H:%M:%S")`. +3. **`project_name`**: Hardcoded as "Linear Regression Training". +4. **`duration`**: Calculated as the difference between the training start and end times. +5. **`emissions`**: Retrieved directly from the `CodeCarbon` tracker after stopping it. +6. **`emissions_rate`**: Calculated as `emissions / duration`. +7. **`cpu_power`**: Extracted from the tracker using `get_field_or_none(tracker, "_cpu_power")`. +8. **`gpu_power`**: Extracted from the tracker using `get_field_or_none(tracker, "_gpu_power")`. +9. **`ram_power`**: Extracted from the tracker using `get_field_or_none(tracker, "_ram_power")`. +10. **`cpu_energy`**: Calculated as `cpu_power * (duration / 3600)`. +11. **`gpu_energy`**: Calculated as `gpu_power * (duration / 3600)`. +12. **`ram_energy`**: Calculated as `ram_power * (duration / 3600)`. +13. **`energy_consumed`**: Extracted from the tracker using `get_field_or_none(tracker, "_total_energy")`. +14. **`country_name`**: Retrieved from the `ip-api.com` JSON response. +15. **`country_iso_code`**: Retrieved from the `ip-api.com` JSON response. +16. **`region`**: Retrieved from the `ip-api.com` JSON response. +17. **`cloud_provider`**: Retrieved from the environment variable `CLOUD_PROVIDER`. +18. **`cloud_region`**: Retrieved from the environment variable `CLOUD_REGION`. +19. **`os`**: Retrieved using `platform.system()`. +20. **`python_version`**: Retrieved using `platform.python_version()`. +21. **`codecarbon_version`**: Retrieved using `pkg_resources.get_distribution("codecarbon").version`. +22. **`cpu_count`**: Retrieved using `os.cpu_count()`. +23. **`cpu_model`**: Retrieved using the `get_cpu_model()` function. +24. **`gpu_count`**: Hardcoded as `0` (no GPU used in this example). +25. **`gpu_model`**: Hardcoded as `None` (no GPU used in this example). +26. **`longitude`**: Retrieved from the `ip-api.com` JSON response. +27. **`latitude`**: Retrieved from the `ip-api.com` JSON response. +28. **`ram_total_size`**: Calculated using `os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") / (1024**3)`. +29. **`tracking_mode`**: Extracted from the tracker using `get_field_or_none(tracker, "_tracking_mode")`. +30. **`on_cloud`**: Determined by checking if the `CLOUD_PROVIDER` environment variable is set. +31. **`pue`**: Extracted from the tracker using `get_field_or_none(tracker, "_pue")`. +32. **`extra`**: Extracted from the tracker using `get_field_or_none(tracker, "_measure_power_method")`. +33. **`kWh`**: Hardcoded as "kWh" (unit of energy measurement). + +--- + +## Requirements + +To run this script, you need the following dependencies installed: + +--- + +### Python Packages + +Install the required Python packages using `pip`: + +```bash +pip install torch codecarbon psutil wmi +``` + +--- + +### Package Versions + +Here are the recommended versions of the packages: + +| **Package** | **Version** | **Description** | +|---------------|-------------|---------------------------------------------------------------------------------| +| `torch` | `>=2.0.0` | PyTorch library for deep learning. | +| `codecarbon` | `>=2.0.0` | Library for tracking carbon emissions. | +| `psutil` | `>=5.8.0` | Cross-platform library for retrieving system information (CPU, RAM, etc.). | +| `wmi` | `>=1.5.1` | Windows Management Instrumentation library (required only on Windows). | + +--- + +### Operating System Dependencies + +- **Linux**: No additional dependencies required. +- **Windows**: The `wmi` library requires the `pywin32` package, which is installed automatically with `wmi`. +- **macOS**: No additional dependencies required. + + +--- From 0afb31910f26da6b8ff055bc4737561d6a650af3 Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Thu, 23 Jan 2025 07:38:17 +0100 Subject: [PATCH 07/13] Update codecarbonVersFormulaire.md --- .../bash/codecarbonVersFormulaire.md | 45 +++++++++++++------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index 91e83b0..411cc86 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -15,18 +15,27 @@ import requests import time from datetime import datetime import pkg_resources +import csv +import psutil # For cross-platform RAM and CPU info # Fetch CPU model def get_cpu_model(): + system = platform.system() try: - cpu_model = platform.processor() - if cpu_model: - return cpu_model - if os.path.exists("/proc/cpuinfo"): - with open("/proc/cpuinfo", "r") as f: - for line in f: - if "model name" in line: - return line.split(":")[1].strip() + if system == "Linux": + if os.path.exists("/proc/cpuinfo"): + with open("/proc/cpuinfo", "r") as f: + for line in f: + if "model name" in line: + return line.split(":")[1].strip() + elif system == "Windows": + import wmi # Windows Management Instrumentation + c = wmi.WMI() + for processor in c.Win32_Processor(): + return processor.Name + elif system == "Darwin": # macOS + import subprocess + return subprocess.check_output(["sysctl", "-n", "machdep.cpu.brand_string"]).decode().strip() except Exception as e: print(f"Error fetching CPU model: {e}") return None @@ -67,6 +76,9 @@ def extract_fields(tracker, emissions, duration): gpu_energy = gpu_power * duration_hours if gpu_power else None ram_energy = ram_power * duration_hours if ram_power else None + # Get RAM size in GB + ram_total_size = round(psutil.virtual_memory().total / (1024**3), 2) + fields = { "run_id": get_field_or_none(tracker, "_experiment_id"), "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), @@ -95,7 +107,7 @@ def extract_fields(tracker, emissions, duration): "gpu_model": None, "longitude": location_info["longitude"], "latitude": location_info["latitude"], - "ram_total_size": round(os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") / (1024**3), 2), + "ram_total_size": ram_total_size, "tracking_mode": get_field_or_none(tracker, "_tracking_mode"), "on_cloud": "Yes" if os.environ.get("CLOUD_PROVIDER") else "No", "pue": get_field_or_none(tracker, "_pue", 1.0), @@ -150,13 +162,20 @@ end_time = time.time() training_duration = end_time - start_time emissions = tracker.stop() -# Extract and print tracking fields +# Extract tracking fields fields = extract_fields(tracker, emissions, training_duration) -for key, value in fields.items(): - print(f"{key}: {value}") + +# Write tracking fields to a CSV file +csv_file = "tracking_info.csv" +with open(csv_file, mode="w", newline="") as file: + writer = csv.DictWriter(file, fieldnames=fields.keys()) + writer.writeheader() + writer.writerow(fields) + +print(f"Tracking information saved to {csv_file}") ``` -Output: +Outputs a csv file containing these fields: ``` run_id: 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 From 7fcefbf8fa1681e7f9a1f743e75f5934414ba384 Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Thu, 23 Jan 2025 15:22:38 +0100 Subject: [PATCH 08/13] Update codecarbonVersFormulaire.md --- .../bash/codecarbonVersFormulaire.md | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index 411cc86..8ba8f8a 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -1,5 +1,10 @@ # Conversion entre les champs CodeCarbon et les Champs du Datamodel +## Prerequisities + +If you are using a cloud server, add CLOUD_PROVIDER = "Provider" as a global environment variable on your system. +Tested on CPU only! What is extra? + ## Informations Example of a python script to extract these fields while training a regression model: @@ -195,7 +200,6 @@ country_name: France country_iso_code: FR region: Île-de-France cloud_provider: None -cloud_region: None os: Linux python_version: 3.13.1 codecarbon_version: 2.8.2 @@ -213,16 +217,6 @@ extra: None kWh: kWh ``` -Tested on CPU only: -Some fields were not found including - -- cloud_provider -> fetched using CLOUD_PROVIDER OK? -- cloud_region How is this different from region? Since we already have the on_cloud field? currently fetched using CLOUD_REGION if different? -- cpu_model -> fetched on my Linux system but needs to be updated for other platforms in python code -- on_cloud -> yes if CLOUD_PROVIDER found, OK? -- extra -> what is this? -- tracking mode is a field in code carbon _tracking_mode, should we use this or M14/M15? - # CodeCarbon Tracking Fields Documentation This document provides a comprehensive explanation of how each key-value pair is extracted and what it represents in the context of the `CodeCarbon` emissions tracking script. From 36e8834d306a56beb719bdf4b771e722a35418f1 Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Mon, 27 Jan 2025 10:26:34 +0100 Subject: [PATCH 09/13] Update parameters report path in table --- .../bash/codecarbonVersFormulaire.md | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index 8ba8f8a..c67250b 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -225,41 +225,41 @@ This document provides a comprehensive explanation of how each key-value pair is ## Table of Key-Value Pairs -| **Key** | **What It Represents** | -|------------------------|---------------------------------------------------------------------------------------| -| `run_id` | A unique identifier for the current run, generated by the `CodeCarbon` tracker. | -| `timestamp` | The current date and time when the tracking data was extracted. | -| `project_name` | The name of the project being tracked (e.g., "Linear Regression Training"). | -| `duration` | The total time (in seconds) taken for the training process. | -| `emissions` | The total carbon emissions (in kg of CO₂) produced during the training. | -| `emissions_rate` | The rate of carbon emissions (in kg of CO₂ per second) during the training. | -| `cpu_power` | The power consumption (in kW) of the CPU during the training. | -| `gpu_power` | The power consumption (in kW) of the GPU during the training. | -| `ram_power` | The power consumption (in kW) of the RAM during the training. | -| `cpu_energy` | The total energy consumed (in kWh) by the CPU during the training. | -| `gpu_energy` | The total energy consumed (in kWh) by the GPU during the training. | -| `ram_energy` | The total energy consumed (in kWh) by the RAM during the training. | -| `energy_consumed` | The total energy consumed (in kWh) by the system during the training. | -| `country_name` | The name of the country where the training was executed. | -| `country_iso_code` | The ISO code of the country where the training was executed. | -| `region` | The region (e.g., state or province) where the training was executed. | -| `cloud_provider` | The cloud provider used for the training (if applicable). | -| `cloud_region` | The region of the cloud provider used for the training (if applicable). | -| `os` | The operating system on which the training was executed. | -| `python_version` | The version of Python used to run the script. | -| `codecarbon_version` | The version of the `CodeCarbon` library used for tracking. | -| `cpu_count` | The number of CPU cores available on the system. | -| `cpu_model` | The model name of the CPU used for the training. | -| `gpu_count` | The number of GPUs available on the system. | -| `gpu_model` | The model name of the GPU used for the training (if applicable). | -| `longitude` | The longitude of the location where the training was executed. | -| `latitude` | The latitude of the location where the training was executed. | -| `ram_total_size` | The total size of the RAM (in GB) available on the system. | -| `tracking_mode` | The mode used by `CodeCarbon` for tracking (e.g., "machine" for local tracking). | -| `on_cloud` | Indicates whether the training was executed on a cloud provider (Yes/No). | -| `pue` | The Power Usage Effectiveness (PUE) of the data center (if applicable). | -| `extra` | Additional information about the power measurement method used by `CodeCarbon`. | -| `kWh` | The unit of energy measurement (kilowatt-hours). | +| Key | What It Represents | Path | +|---------------------|----------------------------------------------------------------------------------|-----------------------------------------------| +| **run_id** | A unique identifier for the current run, generated by the CodeCarbon tracker. | `[header]reportId= H4` | +| **timestamp** | The current date and time when the tracking data was extracted. | `[header]reportDatetime= H5` | +| **project_name** | The name of the project being tracked (e.g., "Linear Regression Training"). | | +| **duration** | The total time (in seconds) taken for the training process. | `[measures](measures.1)measurementDuration = M113` | +| **emissions** | The total carbon emissions (in kg of CO₂) produced during the training. | | +| **emissions_rate** | The rate of carbon emissions (in kg of CO₂ per second) during the training. | | +| **cpu_power** | The power consumption (in kW) of the CPU during the training. | | +| **gpu_power** | The power consumption (in kW) of the GPU during the training. | | +| **ram_power** | The power consumption (in kW) of the RAM during the training. | | +| **cpu_energy** | The total energy consumed (in kWh) by the CPU during the training. | | +| **gpu_energy** | The total energy consumed (in kWh) by the GPU during the training. | | +| **ram_energy** | The total energy consumed (in kWh) by the RAM during the training. | | +| **energy_consumed** | The total energy consumed (in kWh) by the system during the training. | `[measures](measures.1)powerConsumption = M112` | +| **country_name** | The name of the country where the training was executed. | `[environment]country = E1` | +| **country_iso_code**| The ISO code of the country where the training was executed. | | +| **region** | The region (e.g., state or province) where the training was executed. | | +| **cloud_provider** | The cloud provider used for the training (if applicable). | `[infrastructure]cloudProvider = I2` | +| **cloud_region** | The region of the cloud provider used for the training (if applicable). | `[environment]country = E1` | +| **os** | The operating system on which the training was executed. | `[system]os = S1` | +| **python_version** | The version of Python used to run the script. | | +| **codecarbon_version** | The version of the CodeCarbon library used for tracking. | `[measures](measures.1)version = M13` | +| **cpu_count** | The number of CPU cores available on the system. | `[infrastructure](components.1)nbComponent = IC12` | +| **cpu_model** | The model name of the CPU used for the training. | `[infrastructure](components.1)componentName = IC11` | +| **gpu_count** | The number of GPUs available on the system. | `[infrastructure](components.2)nbComponent = IC12` | +| **gpu_model** | The model name of the GPU used for the training (if applicable). | `[infrastructure](components.2)componentName = IC11` | +| **longitude** | The longitude of the location where the training was executed. | `[environment]longitude = E3` | +| **latitude** | The latitude of the location where the training was executed. | `[environment]latitude = E2` | +| **ram_total_size** | The total size of the RAM (in GB) available on the system. | `[infrastructure](components.3)memorySize = IC13` | +| **tracking_mode** | The mode used by CodeCarbon for tracking (e.g., "machine" for local tracking). | `[measures](measures.1)cpuTrackingMode = M14`
`[measures](measures.1)gpuTrackingMode = M15` | +| **on_cloud** | Indicates whether the training was executed on a cloud provider (Yes/No). |`[infrastructure]infraType = I1`| +| **pue** | The Power Usage Effectiveness (PUE) of the data center (if applicable). | | +| **extra** | Additional information about the power measurement method used by CodeCarbon. | `[measures](measures.*)measurementMethod = M11` | +| **kWh** | The unit of energy measurement (kilowatt-hours). | `[measures](measures.*)unit = M19` | The table will be soon be updated with paths to variables in the report.txt generated using the bash script. --- From e0083ce251c958b493d496c5f947a050d2648cbc Mon Sep 17 00:00:00 2001 From: Luka lafaye de Micheaux Date: Tue, 28 Jan 2025 18:35:22 +0100 Subject: [PATCH 10/13] added remarks --- .../small-automation/bash/README.md | 2 + .../bash/codecarbonVersFormulaire.md | 190 ++++-------------- .../small-automation/bash/gen_form.sh | 0 3 files changed, 45 insertions(+), 147 deletions(-) mode change 100644 => 100755 tools/json_generator/small-automation/bash/gen_form.sh diff --git a/tools/json_generator/small-automation/bash/README.md b/tools/json_generator/small-automation/bash/README.md index 1ce7711..aebb0b2 100644 --- a/tools/json_generator/small-automation/bash/README.md +++ b/tools/json_generator/small-automation/bash/README.md @@ -1,3 +1,5 @@ +Some proposition of changes can be seen in `codecarbonVersFormulaire.md`. + How to use these scripts to generate the json report following the ai power measurement sharing ? 1. Edit the form_Gen.conf to specify how many objects you want to generate in the form diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index c67250b..2fd40dd 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -1,125 +1,27 @@ -# Conversion entre les champs CodeCarbon et les Champs du Datamodel +# Automatically fetch data from CodeCarbon ## Prerequisities If you are using a cloud server, add CLOUD_PROVIDER = "Provider" as a global environment variable on your system. Tested on CPU only! What is extra? -## Informations +## Information + +Install this package: https://github.com/lukalafaye/BoAmps_Carbon + +Usage example: training a regression model and saving Carbon data in BoAmps compatible csv using BoAmps_Carbon package. -Example of a python script to extract these fields while training a regression model: ```py +from BoAmps_Carbon.tracker import TrackerUtility + +tracker = TrackerUtility(project_name="My Experiment") +tracker.start_cracker() + + import torch import torch.nn as nn import torch.optim as optim -from codecarbon import EmissionsTracker -import platform -import os -import requests -import time -from datetime import datetime -import pkg_resources -import csv -import psutil # For cross-platform RAM and CPU info - -# Fetch CPU model -def get_cpu_model(): - system = platform.system() - try: - if system == "Linux": - if os.path.exists("/proc/cpuinfo"): - with open("/proc/cpuinfo", "r") as f: - for line in f: - if "model name" in line: - return line.split(":")[1].strip() - elif system == "Windows": - import wmi # Windows Management Instrumentation - c = wmi.WMI() - for processor in c.Win32_Processor(): - return processor.Name - elif system == "Darwin": # macOS - import subprocess - return subprocess.check_output(["sysctl", "-n", "machdep.cpu.brand_string"]).decode().strip() - except Exception as e: - print(f"Error fetching CPU model: {e}") - return None - -# Extract tracking fields -def extract_fields(tracker, emissions, duration): - def get_field_or_none(obj, attr, default=None): - return getattr(obj, attr, default) - - def get_location_info(): - try: - response = requests.get("http://ip-api.com/json/") - if response.status_code == 200: - data = response.json() - return { - "country_name": data.get("country"), - "country_iso_code": data.get("countryCode"), - "region": data.get("regionName"), - "longitude": data.get("lon"), - "latitude": data.get("lat"), - } - except Exception: - pass - return {"country_name": None, "country_iso_code": None, "region": None, "longitude": None, "latitude": None} - - try: - codecarbon_version = pkg_resources.get_distribution("codecarbon").version - except Exception: - codecarbon_version = None - - location_info = get_location_info() - cpu_power = get_field_or_none(tracker, "_cpu_power", 0) - gpu_power = get_field_or_none(tracker, "_gpu_power", 0) - ram_power = get_field_or_none(tracker, "_ram_power", 0) - duration_hours = duration / 3600 - - cpu_energy = cpu_power * duration_hours if cpu_power else None - gpu_energy = gpu_power * duration_hours if gpu_power else None - ram_energy = ram_power * duration_hours if ram_power else None - - # Get RAM size in GB - ram_total_size = round(psutil.virtual_memory().total / (1024**3), 2) - - fields = { - "run_id": get_field_or_none(tracker, "_experiment_id"), - "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "project_name": "Linear Regression Training", - "duration": duration, - "emissions": emissions, - "emissions_rate": emissions / duration if duration else None, - "cpu_power": cpu_power, - "gpu_power": gpu_power, - "ram_power": ram_power, - "cpu_energy": cpu_energy, - "gpu_energy": gpu_energy, - "ram_energy": ram_energy, - "energy_consumed": float(get_field_or_none(tracker, "_total_energy", 0)), - "country_name": location_info["country_name"], - "country_iso_code": location_info["country_iso_code"], - "region": location_info["region"], - "cloud_provider": os.environ.get("CLOUD_PROVIDER", "None"), - "cloud_region": os.environ.get("CLOUD_REGION", "None"), - "os": platform.system(), - "python_version": platform.python_version(), - "codecarbon_version": codecarbon_version, - "cpu_count": os.cpu_count(), - "cpu_model": get_cpu_model(), - "gpu_count": 0, - "gpu_model": None, - "longitude": location_info["longitude"], - "latitude": location_info["latitude"], - "ram_total_size": ram_total_size, - "tracking_mode": get_field_or_none(tracker, "_tracking_mode"), - "on_cloud": "Yes" if os.environ.get("CLOUD_PROVIDER") else "No", - "pue": get_field_or_none(tracker, "_pue", 1.0), - "extra": get_field_or_none(tracker, "_measure_power_method"), - "kWh": "kWh", - } - return fields # Generate synthetic data torch.manual_seed(42) @@ -144,13 +46,6 @@ model = LinearRegressionModel() criterion = nn.MSELoss() optimizer = optim.SGD(model.parameters(), lr=0.01) -# Initialize the CodeCarbon tracker -tracker = EmissionsTracker(project_name="Linear Regression Training") -tracker.start() - -# Measure training start time -start_time = time.time() - # Training loop num_epochs = 500 for epoch in range(num_epochs): @@ -162,22 +57,7 @@ for epoch in range(num_epochs): if (epoch + 1) % 50 == 0: print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}") -# Measure training end time and stop tracker -end_time = time.time() -training_duration = end_time - start_time -emissions = tracker.stop() - -# Extract tracking fields -fields = extract_fields(tracker, emissions, training_duration) - -# Write tracking fields to a CSV file -csv_file = "tracking_info.csv" -with open(csv_file, mode="w", newline="") as file: - writer = csv.DictWriter(file, fieldnames=fields.keys()) - writer.writeheader() - writer.writerow(fields) - -print(f"Tracking information saved to {csv_file}") +tracker.stop_tracker("tracking_info.csv") ``` Outputs a csv file containing these fields: @@ -227,26 +107,26 @@ This document provides a comprehensive explanation of how each key-value pair is | Key | What It Represents | Path | |---------------------|----------------------------------------------------------------------------------|-----------------------------------------------| -| **run_id** | A unique identifier for the current run, generated by the CodeCarbon tracker. | `[header]reportId= H4` | +| **run_id** | A unique identifier for the current run, generated by the CodeCarbon tracker. | Could be used for optional measure_id in [measures] | | **timestamp** | The current date and time when the tracking data was extracted. | `[header]reportDatetime= H5` | -| **project_name** | The name of the project being tracked (e.g., "Linear Regression Training"). | | +| **project_name** | The name of the project being tracked (e.g., "Linear Regression Training"). | Could be used for optional measure_name in [measures] | | **duration** | The total time (in seconds) taken for the training process. | `[measures](measures.1)measurementDuration = M113` | -| **emissions** | The total carbon emissions (in kg of CO₂) produced during the training. | | -| **emissions_rate** | The rate of carbon emissions (in kg of CO₂ per second) during the training. | | -| **cpu_power** | The power consumption (in kW) of the CPU during the training. | | -| **gpu_power** | The power consumption (in kW) of the GPU during the training. | | -| **ram_power** | The power consumption (in kW) of the RAM during the training. | | -| **cpu_energy** | The total energy consumed (in kWh) by the CPU during the training. | | -| **gpu_energy** | The total energy consumed (in kWh) by the GPU during the training. | | -| **ram_energy** | The total energy consumed (in kWh) by the RAM during the training. | | +| **emissions** | The total carbon emissions (in kg of CO₂) produced during the training. | NTBA [measures](measures.1) | +| **emissions_rate** | The rate of carbon emissions (in kg of CO₂ per second) during the training. | NTBA [measures](measures.1) | +| **cpu_power** | The power consumption (in kW) of the CPU during the training. | NTBA [measures](measures.1)cpu_powerConsumption | +| **gpu_power** | The power consumption (in kW) of the GPU during the training. | NTBA [measures](measures.1)gpu_powerConsumption | +| **ram_power** | The power consumption (in kW) of the RAM during the training. | NTBA [measures](measures.1)ram_powerConsumption | +| **cpu_energy** | The total energy consumed (in kWh) by the CPU during the training. | NTBA [measures](measures.1)cpu_energy | +| **gpu_energy** | The total energy consumed (in kWh) by the GPU during the training. | NTBA [measures](measures.1)gpu_energy | +| **ram_energy** | The total energy consumed (in kWh) by the RAM during the training. | NTBA [measures](measures.1)ram_energy | | **energy_consumed** | The total energy consumed (in kWh) by the system during the training. | `[measures](measures.1)powerConsumption = M112` | | **country_name** | The name of the country where the training was executed. | `[environment]country = E1` | -| **country_iso_code**| The ISO code of the country where the training was executed. | | -| **region** | The region (e.g., state or province) where the training was executed. | | +| **country_iso_code**| The ISO code of the country where the training was executed. | NTBA | +| **region** | The region (e.g., state or province) where the training was executed. | NTBA [environment]region | | **cloud_provider** | The cloud provider used for the training (if applicable). | `[infrastructure]cloudProvider = I2` | | **cloud_region** | The region of the cloud provider used for the training (if applicable). | `[environment]country = E1` | | **os** | The operating system on which the training was executed. | `[system]os = S1` | -| **python_version** | The version of Python used to run the script. | | +| **python_version** | The version of Python used to run the script. | [software]version and automatically fill [software]language to Python| | **codecarbon_version** | The version of the CodeCarbon library used for tracking. | `[measures](measures.1)version = M13` | | **cpu_count** | The number of CPU cores available on the system. | `[infrastructure](components.1)nbComponent = IC12` | | **cpu_model** | The model name of the CPU used for the training. | `[infrastructure](components.1)componentName = IC11` | @@ -257,7 +137,7 @@ This document provides a comprehensive explanation of how each key-value pair is | **ram_total_size** | The total size of the RAM (in GB) available on the system. | `[infrastructure](components.3)memorySize = IC13` | | **tracking_mode** | The mode used by CodeCarbon for tracking (e.g., "machine" for local tracking). | `[measures](measures.1)cpuTrackingMode = M14`
`[measures](measures.1)gpuTrackingMode = M15` | | **on_cloud** | Indicates whether the training was executed on a cloud provider (Yes/No). |`[infrastructure]infraType = I1`| -| **pue** | The Power Usage Effectiveness (PUE) of the data center (if applicable). | | +| **pue** | The Power Usage Effectiveness (PUE) of the data center (if applicable). | NTBA? | | **extra** | Additional information about the power measurement method used by CodeCarbon. | `[measures](measures.*)measurementMethod = M11` | | **kWh** | The unit of energy measurement (kilowatt-hours). | `[measures](measures.*)unit = M19` | @@ -314,6 +194,9 @@ Install the required Python packages using `pip`: ```bash pip install torch codecarbon psutil wmi +git clone https://github.com/lukalafaye/BoAmps_Carbon +cd BoAmps_Carbon +pip install . ``` --- @@ -339,3 +222,16 @@ Here are the recommended versions of the packages: --- + +## Remarks + +- NTBA in table means needs to be added in report, lots of variables need to be added as new fields in generated reports by bash script +- How are measures tied to tasks? Maybe add an optional id_measure in each task to create that connection? +- Measures should have a new field measure_name to explain what kind of task they are measuring... +- Maybe add `emissions` and `emissions_rate` fields in measure objects for carbon emissions? or do these belong in powerSourceCarbonIntensity? +- In measure objects, powerConsumption should be replaced by cpu, gpu, and ram consumption fields as cpu, gpu, and ram work on a task at the same time... +- ├averageUtilizationCpu and ├averageUtilizationGpu can be fetched using Carbon maybe? +- region NTBA in [environment] as well as all the other NTBA... +- The environment might be different for different measures -> on top of global variable for it in report, add measure_environment fields in each measure object... + +Lots of objects depend on a single measure ([system], [software], [infrastructure], [environment]...) ideally tasks should include measures as sub sections, which should include [system], [software], [infrastructure], [environment] as sub sections... \ No newline at end of file diff --git a/tools/json_generator/small-automation/bash/gen_form.sh b/tools/json_generator/small-automation/bash/gen_form.sh old mode 100644 new mode 100755 From 6618542fb8f950e4e85429594186950f2a469da5 Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Wed, 29 Jan 2025 23:19:19 +0100 Subject: [PATCH 11/13] Update codecarbonVersFormulaire.md --- .../bash/codecarbonVersFormulaire.md | 132 ++++-------------- 1 file changed, 27 insertions(+), 105 deletions(-) diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index 2fd40dd..2652606 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -1,66 +1,9 @@ # Automatically fetch data from CodeCarbon -## Prerequisities - -If you are using a cloud server, add CLOUD_PROVIDER = "Provider" as a global environment variable on your system. -Tested on CPU only! What is extra? - ## Information -Install this package: https://github.com/lukalafaye/BoAmps_Carbon - -Usage example: training a regression model and saving Carbon data in BoAmps compatible csv using BoAmps_Carbon package. - - -```py -from BoAmps_Carbon.tracker import TrackerUtility - -tracker = TrackerUtility(project_name="My Experiment") -tracker.start_cracker() - - -import torch -import torch.nn as nn -import torch.optim as optim - -# Generate synthetic data -torch.manual_seed(42) -n_samples = 100 -X = torch.rand(n_samples, 1) * 10 -true_slope = 2.5 -true_intercept = 1.0 -noise = torch.randn(n_samples, 1) * 2 -y = true_slope * X + true_intercept + noise - -# Define the linear regression model -class LinearRegressionModel(nn.Module): - def __init__(self): - super(LinearRegressionModel, self).__init__() - self.linear = nn.Linear(1, 1) - - def forward(self, x): - return self.linear(x) - -# Initialize the model, loss function, and optimizer -model = LinearRegressionModel() -criterion = nn.MSELoss() -optimizer = optim.SGD(model.parameters(), lr=0.01) - -# Training loop -num_epochs = 500 -for epoch in range(num_epochs): - y_pred = model(X) - loss = criterion(y_pred, y) - optimizer.zero_grad() - loss.backward() - optimizer.step() - if (epoch + 1) % 50 == 0: - print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}") - -tracker.stop_tracker("tracking_info.csv") -``` - -Outputs a csv file containing these fields: +`example-carbon.py` trains a regression model and saves Carbon data in a BoAmps compatible csv using the [https://github.com/lukalafaye/BoAmps_Carbon](https://github.com/lukalafaye/BoAmps_Carbon) package. +It will outputs a csv file containing these fields: ``` run_id: 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 @@ -93,13 +36,12 @@ ram_total_size: 13.49 tracking_mode: machine on_cloud: No pue: 1.0 -extra: None kWh: kWh ``` -# CodeCarbon Tracking Fields Documentation +## Tracking Fields Documentation -This document provides a comprehensive explanation of how each key-value pair is extracted and what it represents in the context of the `CodeCarbon` emissions tracking script. +This table provides a comprehensive explanation of how each key-value pair is extracted and what it represents in the context of the `CodeCarbon` emissions tracking script. --- @@ -108,9 +50,9 @@ This document provides a comprehensive explanation of how each key-value pair is | Key | What It Represents | Path | |---------------------|----------------------------------------------------------------------------------|-----------------------------------------------| | **run_id** | A unique identifier for the current run, generated by the CodeCarbon tracker. | Could be used for optional measure_id in [measures] | -| **timestamp** | The current date and time when the tracking data was extracted. | `[header]reportDatetime= H5` | +| **timestamp** | The current date and time when the tracking data was extracted. | `[header]reportDatetime` | | **project_name** | The name of the project being tracked (e.g., "Linear Regression Training"). | Could be used for optional measure_name in [measures] | -| **duration** | The total time (in seconds) taken for the training process. | `[measures](measures.1)measurementDuration = M113` | +| **duration** | The total time (in seconds) taken for the training process. | `[measures](measures.1)measurementDuration` | | **emissions** | The total carbon emissions (in kg of CO₂) produced during the training. | NTBA [measures](measures.1) | | **emissions_rate** | The rate of carbon emissions (in kg of CO₂ per second) during the training. | NTBA [measures](measures.1) | | **cpu_power** | The power consumption (in kW) of the CPU during the training. | NTBA [measures](measures.1)cpu_powerConsumption | @@ -119,26 +61,23 @@ This document provides a comprehensive explanation of how each key-value pair is | **cpu_energy** | The total energy consumed (in kWh) by the CPU during the training. | NTBA [measures](measures.1)cpu_energy | | **gpu_energy** | The total energy consumed (in kWh) by the GPU during the training. | NTBA [measures](measures.1)gpu_energy | | **ram_energy** | The total energy consumed (in kWh) by the RAM during the training. | NTBA [measures](measures.1)ram_energy | -| **energy_consumed** | The total energy consumed (in kWh) by the system during the training. | `[measures](measures.1)powerConsumption = M112` | -| **country_name** | The name of the country where the training was executed. | `[environment]country = E1` | +| **energy_consumed** | The total energy consumed (in kWh) by the system during the training. | `[measures](measures.1)powerConsumption` | +| **country_name** | The name of the country where the training was executed. | `[environment]country` | | **country_iso_code**| The ISO code of the country where the training was executed. | NTBA | | **region** | The region (e.g., state or province) where the training was executed. | NTBA [environment]region | -| **cloud_provider** | The cloud provider used for the training (if applicable). | `[infrastructure]cloudProvider = I2` | -| **cloud_region** | The region of the cloud provider used for the training (if applicable). | `[environment]country = E1` | -| **os** | The operating system on which the training was executed. | `[system]os = S1` | +| **cloud_region** | The region of the cloud provider used for the training (if applicable). | `[environment]country` | +| **os** | The operating system on which the training was executed. | `[system]os` | | **python_version** | The version of Python used to run the script. | [software]version and automatically fill [software]language to Python| -| **codecarbon_version** | The version of the CodeCarbon library used for tracking. | `[measures](measures.1)version = M13` | -| **cpu_count** | The number of CPU cores available on the system. | `[infrastructure](components.1)nbComponent = IC12` | -| **cpu_model** | The model name of the CPU used for the training. | `[infrastructure](components.1)componentName = IC11` | -| **gpu_count** | The number of GPUs available on the system. | `[infrastructure](components.2)nbComponent = IC12` | -| **gpu_model** | The model name of the GPU used for the training (if applicable). | `[infrastructure](components.2)componentName = IC11` | -| **longitude** | The longitude of the location where the training was executed. | `[environment]longitude = E3` | -| **latitude** | The latitude of the location where the training was executed. | `[environment]latitude = E2` | -| **ram_total_size** | The total size of the RAM (in GB) available on the system. | `[infrastructure](components.3)memorySize = IC13` | -| **tracking_mode** | The mode used by CodeCarbon for tracking (e.g., "machine" for local tracking). | `[measures](measures.1)cpuTrackingMode = M14`
`[measures](measures.1)gpuTrackingMode = M15` | -| **on_cloud** | Indicates whether the training was executed on a cloud provider (Yes/No). |`[infrastructure]infraType = I1`| +| **codecarbon_version** | The version of the CodeCarbon library used for tracking. | `[measures](measures.1)version` | +| **cpu_count** | The number of CPU cores available on the system. | `[infrastructure](components.1)nbComponent` | +| **cpu_model** | The model name of the CPU used for the training. | `[infrastructure](components.1)componentName` | +| **gpu_count** | The number of GPUs available on the system. | `[infrastructure](components.2)nbComponent` | +| **gpu_model** | The model name of the GPU used for the training (if applicable). | `[infrastructure](components.2)componentName` | +| **longitude** | The longitude of the location where the training was executed. | `[environment]longitude` | +| **latitude** | The latitude of the location where the training was executed. | `[environment]latitude` | +| **ram_total_size** | The total size of the RAM (in GB) available on the system. | `[infrastructure](components.3)memorySize` | +| **tracking_mode** | The mode used by CodeCarbon for tracking (e.g., "machine" for local tracking). | `[measures](measures.1)cpuTrackingMode`
`[measures](measures.1)gpuTrackingMode` | | **pue** | The Power Usage Effectiveness (PUE) of the data center (if applicable). | NTBA? | -| **extra** | Additional information about the power measurement method used by CodeCarbon. | `[measures](measures.*)measurementMethod = M11` | | **kWh** | The unit of energy measurement (kilowatt-hours). | `[measures](measures.*)unit = M19` | The table will be soon be updated with paths to variables in the report.txt generated using the bash script. @@ -162,8 +101,6 @@ The table will be soon be updated with paths to variables in the report.txt gene 14. **`country_name`**: Retrieved from the `ip-api.com` JSON response. 15. **`country_iso_code`**: Retrieved from the `ip-api.com` JSON response. 16. **`region`**: Retrieved from the `ip-api.com` JSON response. -17. **`cloud_provider`**: Retrieved from the environment variable `CLOUD_PROVIDER`. -18. **`cloud_region`**: Retrieved from the environment variable `CLOUD_REGION`. 19. **`os`**: Retrieved using `platform.system()`. 20. **`python_version`**: Retrieved using `platform.python_version()`. 21. **`codecarbon_version`**: Retrieved using `pkg_resources.get_distribution("codecarbon").version`. @@ -175,42 +112,28 @@ The table will be soon be updated with paths to variables in the report.txt gene 27. **`latitude`**: Retrieved from the `ip-api.com` JSON response. 28. **`ram_total_size`**: Calculated using `os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") / (1024**3)`. 29. **`tracking_mode`**: Extracted from the tracker using `get_field_or_none(tracker, "_tracking_mode")`. -30. **`on_cloud`**: Determined by checking if the `CLOUD_PROVIDER` environment variable is set. 31. **`pue`**: Extracted from the tracker using `get_field_or_none(tracker, "_pue")`. -32. **`extra`**: Extracted from the tracker using `get_field_or_none(tracker, "_measure_power_method")`. 33. **`kWh`**: Hardcoded as "kWh" (unit of energy measurement). --- -## Requirements - -To run this script, you need the following dependencies installed: +## Run python example ---- - -### Python Packages +### Prerequisities Install the required Python packages using `pip`: ```bash -pip install torch codecarbon psutil wmi +pip install requirements.txt git clone https://github.com/lukalafaye/BoAmps_Carbon cd BoAmps_Carbon pip install . ``` ---- - -### Package Versions - -Here are the recommended versions of the packages: - -| **Package** | **Version** | **Description** | -|---------------|-------------|---------------------------------------------------------------------------------| -| `torch` | `>=2.0.0` | PyTorch library for deep learning. | -| `codecarbon` | `>=2.0.0` | Library for tracking carbon emissions. | -| `psutil` | `>=5.8.0` | Cross-platform library for retrieving system information (CPU, RAM, etc.). | -| `wmi` | `>=1.5.1` | Windows Management Instrumentation library (required only on Windows). | +Run python script: +```py +python example-carbon.py +``` --- @@ -220,7 +143,6 @@ Here are the recommended versions of the packages: - **Windows**: The `wmi` library requires the `pywin32` package, which is installed automatically with `wmi`. - **macOS**: No additional dependencies required. - --- ## Remarks @@ -234,4 +156,4 @@ Here are the recommended versions of the packages: - region NTBA in [environment] as well as all the other NTBA... - The environment might be different for different measures -> on top of global variable for it in report, add measure_environment fields in each measure object... -Lots of objects depend on a single measure ([system], [software], [infrastructure], [environment]...) ideally tasks should include measures as sub sections, which should include [system], [software], [infrastructure], [environment] as sub sections... \ No newline at end of file +Lots of objects depend on a single measure ([system], [software], [infrastructure], [environment]...) ideally tasks should include measures as sub sections, which should include [system], [software], [infrastructure], [environment] as sub sections... From d0caeada629c4edb219825c30a6f7cb1bf04fcc2 Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Wed, 29 Jan 2025 23:19:44 +0100 Subject: [PATCH 12/13] Create requirements.txt --- tools/json_generator/small-automation/bash/requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tools/json_generator/small-automation/bash/requirements.txt diff --git a/tools/json_generator/small-automation/bash/requirements.txt b/tools/json_generator/small-automation/bash/requirements.txt new file mode 100644 index 0000000..3284eea --- /dev/null +++ b/tools/json_generator/small-automation/bash/requirements.txt @@ -0,0 +1,4 @@ +torch +codecarbon +psutil +wmi From 14d5f6a28f625953a82fb7e995e99915c769b46f Mon Sep 17 00:00:00 2001 From: Luka Lafaye de Micheaux Date: Wed, 29 Jan 2025 23:20:01 +0100 Subject: [PATCH 13/13] Create example-carbon.py --- .../small-automation/bash/example-carbon.py | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tools/json_generator/small-automation/bash/example-carbon.py diff --git a/tools/json_generator/small-automation/bash/example-carbon.py b/tools/json_generator/small-automation/bash/example-carbon.py new file mode 100644 index 0000000..2f9a899 --- /dev/null +++ b/tools/json_generator/small-automation/bash/example-carbon.py @@ -0,0 +1,45 @@ +from BoAmps_Carbon.tracker import TrackerUtility + +tracker = TrackerUtility(project_name="My Experiment") +tracker.start_cracker() + + +import torch +import torch.nn as nn +import torch.optim as optim + +# Generate synthetic data +torch.manual_seed(42) +n_samples = 100 +X = torch.rand(n_samples, 1) * 10 +true_slope = 2.5 +true_intercept = 1.0 +noise = torch.randn(n_samples, 1) * 2 +y = true_slope * X + true_intercept + noise + +# Define the linear regression model +class LinearRegressionModel(nn.Module): + def __init__(self): + super(LinearRegressionModel, self).__init__() + self.linear = nn.Linear(1, 1) + + def forward(self, x): + return self.linear(x) + +# Initialize the model, loss function, and optimizer +model = LinearRegressionModel() +criterion = nn.MSELoss() +optimizer = optim.SGD(model.parameters(), lr=0.01) + +# Training loop +num_epochs = 500 +for epoch in range(num_epochs): + y_pred = model(X) + loss = criterion(y_pred, y) + optimizer.zero_grad() + loss.backward() + optimizer.step() + if (epoch + 1) % 50 == 0: + print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}") + +tracker.stop_tracker("tracking_info.csv")