Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 07c1d19

Browse files
committedFeb 14, 2025··
fix(agent): solve permission denied when asserting submit dir exist (#713)
The previous approach with submit_dir.exists() runs as the agent user, which may face permission errors on the directory (like /home/fschuch). It is now verified as the submitter user. (cherry picked from commit 5f6beb7)
1 parent dab8c11 commit 07c1d19

File tree

3 files changed

+84
-11
lines changed

3 files changed

+84
-11
lines changed
 

‎changes/agent/713.fixed.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Solved the permissions denied error the agent user could face at submission time when verifying the submission directory exists and is writable

‎jobbergate-agent/jobbergate_agent/jobbergate/submit.py

+31-7
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,16 @@
55
import pwd
66
from dataclasses import dataclass
77
from pathlib import Path
8+
from subprocess import CompletedProcess
89
from tempfile import TemporaryDirectory
910

1011
from buzz import DoExceptParams, handle_errors_async
11-
from jobbergate_core.tools.sbatch import InfoHandler, SubmissionHandler, SubprocessHandler, inject_sbatch_params
12+
from jobbergate_core.tools.sbatch import (
13+
InfoHandler,
14+
SubmissionHandler,
15+
SubprocessHandler,
16+
inject_sbatch_params,
17+
)
1218
from loguru import logger
1319

1420
from jobbergate_agent.clients.cluster_api import backend_client as jobbergate_api_client
@@ -173,7 +179,7 @@ def __post_init__(self):
173179
self.uid = pwan.pw_uid
174180
self.gid = pwan.pw_gid
175181

176-
def run(self, *args, **kwargs):
182+
def run(self, *args, **kwargs) -> CompletedProcess:
177183
kwargs.update(user=self.uid, group=self.gid, env={})
178184
# Tests indicate that the change on the working directory precedes the change of user on the subprocess.
179185
# With that, the user running the agent can face permission denied errors on cwd,
@@ -184,6 +190,24 @@ def run(self, *args, **kwargs):
184190
return super().run(*args, **kwargs)
185191

186192

193+
def validate_submit_dir(submit_dir: Path, subprocess_handler: SubprocessAsUserHandler) -> None:
194+
"""
195+
Validate the submission directory.
196+
197+
The directory must exist and be writable by the user, so this verification is delegated to the subprocess
198+
handler as the user that will run the sbatch command.
199+
200+
This is needed since `submit_dir.exists()` would run as the agent user, which may face permission errors.
201+
"""
202+
if not submit_dir.is_absolute():
203+
raise ValueError("Execution directory must be an absolute path")
204+
try:
205+
subprocess_handler.run(cmd=("test", "-d", submit_dir.as_posix()))
206+
subprocess_handler.run(cmd=("test", "-w", submit_dir.as_posix()))
207+
except RuntimeError as e:
208+
raise ValueError("Execution directory does not exist or is not writable by the user") from e
209+
210+
187211
async def submit_job_script(
188212
pending_job_submission: PendingJobSubmission,
189213
user_mapper: SlurmUserMapper,
@@ -221,10 +245,10 @@ async def _reject_handler(params: DoExceptParams) -> None:
221245
subprocess_handler = SubprocessAsUserHandler(username)
222246

223247
submit_dir = pending_job_submission.execution_directory or SETTINGS.DEFAULT_SLURM_WORK_DIR
224-
if not submit_dir.exists() or not submit_dir.is_absolute():
225-
message = f"The execution directory must exist and be an absolute path, but got '{submit_dir.as_posix()}'"
226-
await mark_as_rejected(pending_job_submission.id, message)
227-
raise JobSubmissionError(message)
248+
async with handle_errors_async(
249+
"Execution directory is invalid", raise_exc_class=JobSubmissionError, do_except=_reject_handler
250+
):
251+
validate_submit_dir(submit_dir, subprocess_handler)
228252

229253
sbatch_handler = SubmissionHandler(
230254
sbatch_path=SETTINGS.SBATCH_PATH,
@@ -277,7 +301,7 @@ async def submit_pending_jobs() -> None:
277301
for pending_job_submission in pending_job_submissions:
278302
logger.debug(f"Submitting pending job_submission {pending_job_submission.id}")
279303
with JobSubmissionError.handle_errors(
280-
(f"Failed to submit pending job_submission {pending_job_submission.id}" "...skipping to next pending job"),
304+
f"Failed to submit pending job_submission {pending_job_submission.id}...skipping to next pending job",
281305
do_except=log_error,
282306
do_else=lambda: logger.debug(f"Finished submitting pending job_submission {pending_job_submission.id}"),
283307
re_raise=False,

‎jobbergate-agent/tests/jobbergate/test_submit.py

+52-4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from jobbergate_agent.jobbergate.schemas import JobScriptFile, PendingJobSubmission, SlurmJobData
1616
from jobbergate_agent.jobbergate.submit import (
17+
SubprocessAsUserHandler,
1718
fetch_pending_submissions,
1819
get_job_script_file,
1920
mark_as_rejected,
@@ -22,6 +23,7 @@
2223
retrieve_submission_file,
2324
submit_job_script,
2425
submit_pending_jobs,
26+
validate_submit_dir,
2527
write_submission_file,
2628
)
2729
from jobbergate_agent.settings import SETTINGS
@@ -403,6 +405,52 @@ async def test_mark_as_rejected__raises_JobbergateApiError_if_the_response_is_no
403405
assert update_route.called
404406

405407

408+
class TestValidateSubmitDir:
409+
"""
410+
Test the ``validate_submit_dir()`` function.
411+
"""
412+
413+
subprocess_handler = SubprocessAsUserHandler(username=getpass.getuser())
414+
415+
def test_validate_submit_dir__success(self, tmp_path):
416+
"""
417+
Test that the function can successfully validate a submission directory.
418+
"""
419+
submit_dir = tmp_path / "submit"
420+
submit_dir.mkdir()
421+
422+
validate_submit_dir(submit_dir, self.subprocess_handler)
423+
424+
def test_validate_submit_dir__raises_exception_if_not_absolute_path(self):
425+
"""
426+
Test that the function raises an exception if the submission directory is not an absolute path.
427+
"""
428+
submit_dir = Path("./submit")
429+
430+
with pytest.raises(ValueError, match="must be an absolute path"):
431+
validate_submit_dir(submit_dir, self.subprocess_handler)
432+
433+
def test_validate_submit_dir__raises_exception_if_dir_is_not_writable(self, tmp_path):
434+
"""
435+
Test that the function raises an exception if the submission directory is not writable.
436+
"""
437+
submit_dir = tmp_path / "submit"
438+
submit_dir.mkdir()
439+
submit_dir.chmod(0o444)
440+
441+
with pytest.raises(ValueError, match="not writable by the user"):
442+
validate_submit_dir(submit_dir, self.subprocess_handler)
443+
444+
def test_validate_submit_dir__raises_exception_if_dir_does_not_exist(self, tmp_path):
445+
"""
446+
Test that the function raises an exception if the submission directory does not exist.
447+
"""
448+
submit_dir = tmp_path / "submit"
449+
450+
with pytest.raises(ValueError, match="Execution directory does not exist"):
451+
validate_submit_dir(submit_dir, self.subprocess_handler)
452+
453+
406454
@pytest.mark.asyncio
407455
@pytest.mark.usefixtures("mock_access_token")
408456
@pytest.mark.parametrize("job_submissions_name", ["dummy-job-submission", "really-long-job-submission-name" * 10])
@@ -553,12 +601,12 @@ async def test_submit_job_script__raises_exception_if_execution_dir_does_not_exi
553601
mocked_sbatch = mock.MagicMock()
554602
mocker.patch("jobbergate_agent.jobbergate.submit.SubmissionHandler", return_value=mocked_sbatch)
555603

556-
with pytest.raises(JobSubmissionError, match="The execution directory must exist and be an absolute path"):
604+
with pytest.raises(JobSubmissionError, match="Execution directory does not exist or is not writable by the user"):
557605
await submit_job_script(pending_job_submission, user_mapper)
558606

559607
mock_mark_as_rejected.assert_called_once_with(
560608
dummy_pending_job_submission_data["id"],
561-
RegexArgMatcher(".*The execution directory must exist and be an absolute path.*"),
609+
RegexArgMatcher(".*Execution directory does not exist or is not writable by the user.*"),
562610
)
563611

564612

@@ -577,12 +625,12 @@ async def test_submit_job_script__raises_exception_if_execution_dir_is_relative(
577625
mocked_sbatch = mock.MagicMock()
578626
mocker.patch("jobbergate_agent.jobbergate.submit.SubmissionHandler", return_value=mocked_sbatch)
579627

580-
with pytest.raises(JobSubmissionError, match="The execution directory must exist and be an absolute path"):
628+
with pytest.raises(JobSubmissionError, match="Execution directory must be an absolute path"):
581629
await submit_job_script(pending_job_submission, user_mapper)
582630

583631
mock_mark_as_rejected.assert_called_once_with(
584632
dummy_pending_job_submission_data["id"],
585-
RegexArgMatcher(".*The execution directory must exist and be an absolute path.*"),
633+
RegexArgMatcher(".*Execution directory must be an absolute path.*"),
586634
)
587635

588636

0 commit comments

Comments
 (0)
Please sign in to comment.