Skip to content

Commit

Permalink
Merge pull request #527 from juliendoutre/julien.doutre/github-action…
Browse files Browse the repository at this point in the history
…s-support

Add new Github Action ecosystem
  • Loading branch information
juliendoutre authored Feb 6, 2025
2 parents 8109e69 + 2343238 commit 2e7999d
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 20 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,21 @@ Metadata heuristics:
| typosquatting | Identify packages that are named closely to an highly popular package |


### GitHub Action

Source code heuristics:

| **Heuristic** | **Description** |
|:-------------:|:---------------:|
| npm-serialize-environment | Identify when a package serializes 'process.env' to exfiltrate environment variables |
| npm-obfuscation | Identify when a package uses a common obfuscation method often used by malware |
| npm-silent-process-execution | Identify when a package silently executes an executable |
| shady-links | Identify when a package contains an URL to a domain with a suspicious extension |
| npm-exec-base64 | Identify when a package dynamically executes code through 'eval' |
| npm-install-script | Identify when a package has a pre or post-install script automatically running commands |
| npm-steganography | Identify when a package retrieves hidden data from an image and executes it |
| npm-dll-hijacking | Identifies when a malicious package manipulates a trusted application into loading a malicious DLL |
| npm-exfiltrate-sensitive-data | Identify when a package reads and exfiltrates sensitive data from the local system |
<!-- END_RULE_LIST -->

## Custom Rules
Expand Down
3 changes: 3 additions & 0 deletions guarddog/analyzer/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from guarddog.analyzer.metadata.npm import NPM_METADATA_RULES
from guarddog.analyzer.metadata.pypi import PYPI_METADATA_RULES
from guarddog.analyzer.metadata.go import GO_METADATA_RULES
from guarddog.analyzer.metadata.github_action import GITHUB_ACTION_METADATA_RULES
from guarddog.ecosystems import ECOSYSTEM


Expand All @@ -13,3 +14,5 @@ def get_metadata_detectors(ecosystem: ECOSYSTEM) -> dict[str, Detector]:
return NPM_METADATA_RULES
case ECOSYSTEM.GO:
return GO_METADATA_RULES
case ECOSYSTEM.GITHUB_ACTION:
return GITHUB_ACTION_METADATA_RULES
11 changes: 11 additions & 0 deletions guarddog/analyzer/metadata/github_action/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from typing import Type

from guarddog.analyzer.metadata import Detector

GITHUB_ACTION_METADATA_RULES = {}

classes: list[Type[Detector]] = []

for detectorClass in classes:
detectorInstance = detectorClass() # type: ignore
GITHUB_ACTION_METADATA_RULES[detectorInstance.get_name()] = detectorInstance
42 changes: 22 additions & 20 deletions guarddog/analyzer/sourcecode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,34 +71,36 @@ def get_sourcecode_rules(
data = yaml.load(fd, Loader=SafeLoader)
for rule in data["rules"]:
for lang in rule["languages"]:
ecosystem = None
ecosystems = set()
match lang:
case "python":
ecosystem = ECOSYSTEM.PYPI
ecosystems.add(ECOSYSTEM.PYPI)
case "javascript" | "typescript" | "json":
ecosystem = ECOSYSTEM.NPM
ecosystems.add(ECOSYSTEM.NPM)
ecosystems.add(ECOSYSTEM.GITHUB_ACTION)
case "go":
ecosystem = ECOSYSTEM.GO
ecosystems.add(ECOSYSTEM.GO)
case _:
continue

# avoids duplicates when multiple languages are supported by a rule
if not next(
filter(
lambda r: r.id == rule["id"],
get_sourcecode_rules(ecosystem, SempgrepRule),
),
None,
):
SOURCECODE_RULES.append(
SempgrepRule(
id=rule["id"],
ecosystem=ecosystem,
description=rule.get("metadata", {}).get("description", ""),
file=file_name,
rule_content=rule,
for ecosystem in ecosystems:
# avoids duplicates when multiple languages are supported by a rule
if not next(
filter(
lambda r: r.id == rule["id"],
get_sourcecode_rules(ecosystem, SempgrepRule),
),
None,
):
SOURCECODE_RULES.append(
SempgrepRule(
id=rule["id"],
ecosystem=ecosystem,
description=rule.get("metadata", {}).get("description", ""),
file=file_name,
rule_content=rule,
)
)
)

yara_rule_file_names = list(
filter(lambda x: x.endswith("yar"), os.listdir(current_dir))
Expand Down
3 changes: 3 additions & 0 deletions guarddog/ecosystems.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ class ECOSYSTEM(Enum):
PYPI = "pypi"
NPM = "npm"
GO = "go"
GITHUB_ACTION = "github-action"


def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
Expand All @@ -15,5 +16,7 @@ def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
return "npm"
case ECOSYSTEM.GO:
return "go"
case ECOSYSTEM.GITHUB_ACTION:
return "GitHub Action"
case _:
return ecosystem.value
3 changes: 3 additions & 0 deletions guarddog/scanners/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .pypi_project_scanner import PypiRequirementsScanner
from .go_package_scanner import GoModuleScanner
from .go_project_scanner import GoDependenciesScanner
from .github_action_scanner import GithubActionScanner
from .scanner import PackageScanner, ProjectScanner
from ..ecosystems import ECOSYSTEM

Expand All @@ -29,6 +30,8 @@ def get_package_scanner(ecosystem: ECOSYSTEM) -> Optional[PackageScanner]:
return NPMPackageScanner()
case ECOSYSTEM.GO:
return GoModuleScanner()
case ECOSYSTEM.GITHUB_ACTION:
return GithubActionScanner()
return None


Expand Down
51 changes: 51 additions & 0 deletions guarddog/scanners/github_action_scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import logging
import os
import pathlib
import typing
from urllib.parse import urlparse

from guarddog.analyzer.analyzer import Analyzer
from guarddog.ecosystems import ECOSYSTEM
from guarddog.scanners.scanner import PackageScanner

log = logging.getLogger("guarddog")


class GithubActionScanner(PackageScanner):
def __init__(self) -> None:
super().__init__(Analyzer(ECOSYSTEM.GITHUB_ACTION))

def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
repo = self._get_repo(package_name)
tarball_url = self._get_git_tarball_url(repo, version)

log.debug(f"Downloading GitHub Action source from {tarball_url}")

file_extension = pathlib.Path(tarball_url).suffix
if file_extension == "":
file_extension = ".zip"

zippath = os.path.join(directory, package_name.replace("/", "-") + file_extension)
unzippedpath = zippath.removesuffix(file_extension)
self.download_compressed(tarball_url, zippath, unzippedpath)

return {}, unzippedpath

def _get_repo(self, url: str) -> str:
parsed_url = urlparse(url)

if parsed_url.hostname and parsed_url.hostname != "github.com":
raise ValueError("Invalid GitHub repo URL: " + url)

path = parsed_url.path.removesuffix(".git").strip("/")

if path.count("/") != 1:
raise ValueError("Invalid GitHub repo name: " + path)

return path

def _get_git_tarball_url(self, repo: str, version=None) -> str:
if not version:
return f"https://api.github.com/repos/{repo}/zipball"
else:
return f"https://github.com/{repo}/archive/refs/tags/{version}.zip"
22 changes: 22 additions & 0 deletions tests/core/test_github_action_scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os.path
import tempfile

import pytest

from guarddog.scanners import GithubActionScanner


def test_download_and_get_github_action_by_url():
scanner = GithubActionScanner()
with tempfile.TemporaryDirectory() as tmpdirname:
data, path = scanner.download_and_get_package_info(tmpdirname, "https://github.com/actions/checkout.git", "v4.2.2")
assert not data
assert os.path.exists(os.path.join(tmpdirname, "https:--github.com-actions-checkout.git", "checkout-4.2.2", "package.json"))


def test_download_and_get_github_action_by_name():
scanner = GithubActionScanner()
with tempfile.TemporaryDirectory() as tmpdirname:
data, path = scanner.download_and_get_package_info(tmpdirname, "actions/checkout", "v4.2.2")
assert not data
assert os.path.exists(os.path.join(tmpdirname, "actions-checkout", "checkout-4.2.2", "package.json"))

0 comments on commit 2e7999d

Please sign in to comment.