diff --git a/tools/json_generator/small-automation/bash/README.md b/tools/json_generator/small-automation/bash/README.md index 375ff06..aebb0b2 100644 --- a/tools/json_generator/small-automation/bash/README.md +++ b/tools/json_generator/small-automation/bash/README.md @@ -1,15 +1,58 @@ +Some proposition of changes can be seen in `codecarbonVersFormulaire.md`. + How to use these scripts to generate the json report following the ai power measurement sharing ? -1. Edit the form_gen.conf to specify how many objects you want to generate in the form +1. Edit the form_Gen.conf to specify how many objects you want to generate in the form + 2. Use the gen_form.sh script - ./gen_form.sh > + ./gen_form.sh > + generation type can be -a for all fields, -m for mandatory fields + is form_Gen.conf in this directory + can be a txt file + + Example: ./gen_form.sh -a form_Gen.conf > report1.txt + This script uses the references.param file which has been created manually from the json datamodel. Please don't modify this file. + It uses the parameters file to create the form with the desired number of objects instances. - The form is written in the filename.txt -3. Edit the filename.txt to complete the fields directly inside this file before using the second script. + The form is written in the . + +3. Edit the to complete the fields directly inside this file before using the second script. + 4. Transform this text file into our json model using the form2json.sh ./form2json.sh > You just created your first report ! troubleshooting : - if you have this error : "/bin/bash^M: bad interpreter: No such file or directory" -> use this command : sed -i -e 's/\r$//' + +Why is ├quantization mandatory? +├cpuTrackingMode what is this? +├gpuTrackingMode what is this? + +├averageUtilizationCpu how to get this, same for gpu? + + +Lots of optional fields make it annoying to create the report +├formatVersionSpecificationUri + + +What can be automated / improved? + +├licensing → MULTI select common license or other +├formatVersion → always use same format for all documents 0.0.1 +├reportId → generate UUID automatically +├reportDatetime → generate date automatically +├reportStatus → MULTI draft, final, corrective, $other + + +├confidentialityLevel → MULTI public, internal, confidential, secret + +├publicKey generate automatically? + + +Can be automated in python: + ├os= + ├distribution= + ├distributionVersion= + diff --git a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md index 1400c1d..2652606 100644 --- a/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md +++ b/tools/json_generator/small-automation/bash/codecarbonVersFormulaire.md @@ -1,93 +1,159 @@ -# Conversion entre les champs CodeCarbon et les Champs du Datamodel - -## Informations - -Version : v0.1 - -Etat : Travail - -RAF : - -- Détailler les elemetns -- Affiner les réponses -- Choisir les numeros d'item judicieusement -- ... - -## Contenu - -Dans la suite les champs du datamodel son exprimé en fonction des éléments et valeurs du fichier form_example.txt - -**run_id** : [header]reportId= H4 - -**timestamp** : - -- \[header]├reportDatetime= H5 -- ET -- \[measures](measures.1)measurementDateTime= M114 - -**project_name** : - -**duration** : \[measures](measures.1)measurementDuration = M113 - -**emissions** : - -**emissions_rate** : - -**cpu_power** : - -**gpu_power** : - -**ram_power** : - -**cpu_energy** : - -**gpu_energy** : - -**ram_energy** : - -**energy_consumed** : \[measures](measures.1)powerConsumption = M112 - -**country_name** : \[environment]country = E1 - -**country_iso_code** : - -**region** : - -**cloud_provider** : \[infrastructure]cloudProvider = I2 - -**cloud_region** : - -**os** : \[system]os = S1 - -**python_version** : A METTRE DASN SOFTWARE ? - -**codecarbon_version** : \[measures](measures.1)version = M13 - -**cpu_count** : \[infrastructure](components.1)nbComponent = IC12 = "Intel(R) Xeon(R) Gold 6226R CPU @ 2.90GHz" ? - -**cpu_model** : \[infrastructure](components.1)componentName = IC11 - -**gpu_count** : \[infrastructure](components.2)nbComponent= IC12 - -**gpu_model** : \[infrastructure](components.2)componentName= IC11 = "2 x Tesla V100S-PCIE-32GB" - -**longitude** : \[environment]longitude = E3 - -**latitude** : \[environment]latitude = E2 - -**ram_total_size** : \[infrastructure](components.3)memorySize = IC13 - -**tracking_mode** : \[measures](measures.1) - -- cpuTrackingMode = M14 -- OU -- gpuTrackingMode= M15 ? - -**on_cloud** : - -**pue** : - -**Extra** : -codecarbon ==> \[measures](measures.*)measurementMethod = M11 - -**kWh** ==> \[measures](measures.*)unit= M19 +# Automatically fetch data from CodeCarbon + +## Information + +`example-carbon.py` trains a regression model and saves Carbon data in a BoAmps compatible csv using the [https://github.com/lukalafaye/BoAmps_Carbon](https://github.com/lukalafaye/BoAmps_Carbon) package. +It will outputs a csv file containing these fields: + +``` +run_id: 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 +timestamp: 2025-01-16 10:25:02 +project_name: Linear Regression Training +duration: 0.1512455940246582 +emissions: 1.1263528505851737e-07 +emissions_rate: 7.447177934991875e-07 +cpu_power: Power(kW=0.0425) +gpu_power: Power(kW=0.0) +ram_power: Power(kW=0.0050578808784484865) +cpu_energy: Power(kW=1.785538262791104e-06) +gpu_energy: Power(kW=0.0) +ram_energy: Power(kW=2.1249505499080597e-07) +energy_consumed: 2.0099445932032577e-06 +country_name: France +country_iso_code: FR +region: Île-de-France +cloud_provider: None +os: Linux +python_version: 3.13.1 +codecarbon_version: 2.8.2 +cpu_count: 12 +cpu_model: AMD Ryzen 5 7530U with Radeon Graphics +gpu_count: 0 +gpu_model: None +longitude: 2.2463 +latitude: 48.7144 +ram_total_size: 13.49 +tracking_mode: machine +on_cloud: No +pue: 1.0 +kWh: kWh +``` + +## Tracking Fields Documentation + +This table provides a comprehensive explanation of how each key-value pair is extracted and what it represents in the context of the `CodeCarbon` emissions tracking script. + +--- + +## Table of Key-Value Pairs + +| Key | What It Represents | Path | +|---------------------|----------------------------------------------------------------------------------|-----------------------------------------------| +| **run_id** | A unique identifier for the current run, generated by the CodeCarbon tracker. | Could be used for optional measure_id in [measures] | +| **timestamp** | The current date and time when the tracking data was extracted. | `[header]reportDatetime` | +| **project_name** | The name of the project being tracked (e.g., "Linear Regression Training"). | Could be used for optional measure_name in [measures] | +| **duration** | The total time (in seconds) taken for the training process. | `[measures](measures.1)measurementDuration` | +| **emissions** | The total carbon emissions (in kg of CO₂) produced during the training. | NTBA [measures](measures.1) | +| **emissions_rate** | The rate of carbon emissions (in kg of CO₂ per second) during the training. | NTBA [measures](measures.1) | +| **cpu_power** | The power consumption (in kW) of the CPU during the training. | NTBA [measures](measures.1)cpu_powerConsumption | +| **gpu_power** | The power consumption (in kW) of the GPU during the training. | NTBA [measures](measures.1)gpu_powerConsumption | +| **ram_power** | The power consumption (in kW) of the RAM during the training. | NTBA [measures](measures.1)ram_powerConsumption | +| **cpu_energy** | The total energy consumed (in kWh) by the CPU during the training. | NTBA [measures](measures.1)cpu_energy | +| **gpu_energy** | The total energy consumed (in kWh) by the GPU during the training. | NTBA [measures](measures.1)gpu_energy | +| **ram_energy** | The total energy consumed (in kWh) by the RAM during the training. | NTBA [measures](measures.1)ram_energy | +| **energy_consumed** | The total energy consumed (in kWh) by the system during the training. | `[measures](measures.1)powerConsumption` | +| **country_name** | The name of the country where the training was executed. | `[environment]country` | +| **country_iso_code**| The ISO code of the country where the training was executed. | NTBA | +| **region** | The region (e.g., state or province) where the training was executed. | NTBA [environment]region | +| **cloud_region** | The region of the cloud provider used for the training (if applicable). | `[environment]country` | +| **os** | The operating system on which the training was executed. | `[system]os` | +| **python_version** | The version of Python used to run the script. | [software]version and automatically fill [software]language to Python| +| **codecarbon_version** | The version of the CodeCarbon library used for tracking. | `[measures](measures.1)version` | +| **cpu_count** | The number of CPU cores available on the system. | `[infrastructure](components.1)nbComponent` | +| **cpu_model** | The model name of the CPU used for the training. | `[infrastructure](components.1)componentName` | +| **gpu_count** | The number of GPUs available on the system. | `[infrastructure](components.2)nbComponent` | +| **gpu_model** | The model name of the GPU used for the training (if applicable). | `[infrastructure](components.2)componentName` | +| **longitude** | The longitude of the location where the training was executed. | `[environment]longitude` | +| **latitude** | The latitude of the location where the training was executed. | `[environment]latitude` | +| **ram_total_size** | The total size of the RAM (in GB) available on the system. | `[infrastructure](components.3)memorySize` | +| **tracking_mode** | The mode used by CodeCarbon for tracking (e.g., "machine" for local tracking). | `[measures](measures.1)cpuTrackingMode`
`[measures](measures.1)gpuTrackingMode` | +| **pue** | The Power Usage Effectiveness (PUE) of the data center (if applicable). | NTBA? | +| **kWh** | The unit of energy measurement (kilowatt-hours). | `[measures](measures.*)unit = M19` | + +The table will be soon be updated with paths to variables in the report.txt generated using the bash script. +--- + +## How Each Key-Value Pair is Extracted + +1. **`run_id`**: Extracted from the `CodeCarbon` tracker object using `get_field_or_none(tracker, "_experiment_id")`. +2. **`timestamp`**: Generated using `datetime.now().strftime("%Y-%m-%d %H:%M:%S")`. +3. **`project_name`**: Hardcoded as "Linear Regression Training". +4. **`duration`**: Calculated as the difference between the training start and end times. +5. **`emissions`**: Retrieved directly from the `CodeCarbon` tracker after stopping it. +6. **`emissions_rate`**: Calculated as `emissions / duration`. +7. **`cpu_power`**: Extracted from the tracker using `get_field_or_none(tracker, "_cpu_power")`. +8. **`gpu_power`**: Extracted from the tracker using `get_field_or_none(tracker, "_gpu_power")`. +9. **`ram_power`**: Extracted from the tracker using `get_field_or_none(tracker, "_ram_power")`. +10. **`cpu_energy`**: Calculated as `cpu_power * (duration / 3600)`. +11. **`gpu_energy`**: Calculated as `gpu_power * (duration / 3600)`. +12. **`ram_energy`**: Calculated as `ram_power * (duration / 3600)`. +13. **`energy_consumed`**: Extracted from the tracker using `get_field_or_none(tracker, "_total_energy")`. +14. **`country_name`**: Retrieved from the `ip-api.com` JSON response. +15. **`country_iso_code`**: Retrieved from the `ip-api.com` JSON response. +16. **`region`**: Retrieved from the `ip-api.com` JSON response. +19. **`os`**: Retrieved using `platform.system()`. +20. **`python_version`**: Retrieved using `platform.python_version()`. +21. **`codecarbon_version`**: Retrieved using `pkg_resources.get_distribution("codecarbon").version`. +22. **`cpu_count`**: Retrieved using `os.cpu_count()`. +23. **`cpu_model`**: Retrieved using the `get_cpu_model()` function. +24. **`gpu_count`**: Hardcoded as `0` (no GPU used in this example). +25. **`gpu_model`**: Hardcoded as `None` (no GPU used in this example). +26. **`longitude`**: Retrieved from the `ip-api.com` JSON response. +27. **`latitude`**: Retrieved from the `ip-api.com` JSON response. +28. **`ram_total_size`**: Calculated using `os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") / (1024**3)`. +29. **`tracking_mode`**: Extracted from the tracker using `get_field_or_none(tracker, "_tracking_mode")`. +31. **`pue`**: Extracted from the tracker using `get_field_or_none(tracker, "_pue")`. +33. **`kWh`**: Hardcoded as "kWh" (unit of energy measurement). + +--- + +## Run python example + +### Prerequisities + +Install the required Python packages using `pip`: + +```bash +pip install requirements.txt +git clone https://github.com/lukalafaye/BoAmps_Carbon +cd BoAmps_Carbon +pip install . +``` + +Run python script: +```py +python example-carbon.py +``` + +--- + +### Operating System Dependencies + +- **Linux**: No additional dependencies required. +- **Windows**: The `wmi` library requires the `pywin32` package, which is installed automatically with `wmi`. +- **macOS**: No additional dependencies required. + +--- + +## Remarks + +- NTBA in table means needs to be added in report, lots of variables need to be added as new fields in generated reports by bash script +- How are measures tied to tasks? Maybe add an optional id_measure in each task to create that connection? +- Measures should have a new field measure_name to explain what kind of task they are measuring... +- Maybe add `emissions` and `emissions_rate` fields in measure objects for carbon emissions? or do these belong in powerSourceCarbonIntensity? +- In measure objects, powerConsumption should be replaced by cpu, gpu, and ram consumption fields as cpu, gpu, and ram work on a task at the same time... +- ├averageUtilizationCpu and ├averageUtilizationGpu can be fetched using Carbon maybe? +- region NTBA in [environment] as well as all the other NTBA... +- The environment might be different for different measures -> on top of global variable for it in report, add measure_environment fields in each measure object... + +Lots of objects depend on a single measure ([system], [software], [infrastructure], [environment]...) ideally tasks should include measures as sub sections, which should include [system], [software], [infrastructure], [environment] as sub sections... diff --git a/tools/json_generator/small-automation/bash/example-carbon.py b/tools/json_generator/small-automation/bash/example-carbon.py new file mode 100644 index 0000000..2f9a899 --- /dev/null +++ b/tools/json_generator/small-automation/bash/example-carbon.py @@ -0,0 +1,45 @@ +from BoAmps_Carbon.tracker import TrackerUtility + +tracker = TrackerUtility(project_name="My Experiment") +tracker.start_cracker() + + +import torch +import torch.nn as nn +import torch.optim as optim + +# Generate synthetic data +torch.manual_seed(42) +n_samples = 100 +X = torch.rand(n_samples, 1) * 10 +true_slope = 2.5 +true_intercept = 1.0 +noise = torch.randn(n_samples, 1) * 2 +y = true_slope * X + true_intercept + noise + +# Define the linear regression model +class LinearRegressionModel(nn.Module): + def __init__(self): + super(LinearRegressionModel, self).__init__() + self.linear = nn.Linear(1, 1) + + def forward(self, x): + return self.linear(x) + +# Initialize the model, loss function, and optimizer +model = LinearRegressionModel() +criterion = nn.MSELoss() +optimizer = optim.SGD(model.parameters(), lr=0.01) + +# Training loop +num_epochs = 500 +for epoch in range(num_epochs): + y_pred = model(X) + loss = criterion(y_pred, y) + optimizer.zero_grad() + loss.backward() + optimizer.step() + if (epoch + 1) % 50 == 0: + print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}") + +tracker.stop_tracker("tracking_info.csv") diff --git a/tools/json_generator/small-automation/bash/gen_form.sh b/tools/json_generator/small-automation/bash/gen_form.sh old mode 100644 new mode 100755 diff --git a/tools/json_generator/small-automation/bash/requirements.txt b/tools/json_generator/small-automation/bash/requirements.txt new file mode 100644 index 0000000..3284eea --- /dev/null +++ b/tools/json_generator/small-automation/bash/requirements.txt @@ -0,0 +1,4 @@ +torch +codecarbon +psutil +wmi