-
Notifications
You must be signed in to change notification settings - Fork 0
/
typosquatHunter.py
executable file
·124 lines (106 loc) · 3.67 KB
/
typosquatHunter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/bin/python3
import requests
import json
import jellyfish
import sys
import logging
app_dir = "/app"
levenshtein_number = 1
PyPi_list_url = "https://raw.githubusercontent.com/vincepower/python-pypi-package-list/main/pypi-packages.json"
logger = logging.getLogger()
def get_requirements(filename):
packages = []
try:
with open(filename) as f:
for line in f:
line = line.strip()
if line.startswith("#") or line == "":
continue
package = (
line.split(" ", 1)[0]
.split(",", 1)[0]
.split("==", 1)[0]
.split(">=", 1)[0]
)
packages.append(package)
except FileNotFoundError:
logger.error(f"File {filename} not found, please ensure you have the application's requirements.txt in the app_dir")
packages = []
sys.exit(1)
return packages
def get_whitelist(filename):
whitelist = set()
try:
with open(filename) as f:
for line in f:
line = line.strip()
if line.startswith("#") or line == "":
continue
whitelist.add(line)
except FileNotFoundError:
logger.error(f"No whitelist has been found, all packages will be checked")
whitelist = set()
return whitelist
def get_packages():
response = requests.get(
PyPi_list_url
)
try:
data = response.json()
packages = data["packages"]
except (json.decoder.JSONDecodeError, KeyError):
packages = []
return packages
def get_downloads(package):
response = requests.get(f"https://pypistats.org/api/packages/{package}/recent")
if response.status_code == 200:
try:
data = response.json()
downloads = data["data"]["last_month"]
except (json.decoder.JSONDecodeError, KeyError, ValueError):
downloads = 0
else:
downloads = 0
return downloads
def get_similar_packages(package, packages, levenshtein_number):
similar_packages = []
for p in packages:
distance = jellyfish.levenshtein_distance(package, p)
if distance <= levenshtein_number:
similar_packages.append(p)
return similar_packages
def compare_downloads(package, similar_packages):
suspicious_packages = {}
original_downloads = get_downloads(package)
for p in similar_packages:
similar_downloads = get_downloads(p)
if similar_downloads > original_downloads:
suspicious_packages[p] = package
return suspicious_packages
def main():
print("Parsing requirements")
requirements = get_requirements(f"{app_dir}/requirements.txt")
print("Parsing whitelist")
whitelist = get_whitelist(f"{app_dir}/typosquathunterwhitelist.txt")
print("Getting all packages")
packages = get_packages()
warn_packages = {}
for package in requirements:
if package in whitelist:
print(
f"Skipping the package {package} as it is included in the whitelist"
)
continue
print(f"Assessing {package} for typosquatting")
similar_packages = get_similar_packages(package, packages, levenshtein_number)
warn_packages.update(compare_downloads(package, similar_packages))
if warn_packages:
print("\nWarning: The following packages may be typosquatted versions of genuine packages:")
for k, v in warn_packages.items():
print(f"- {v} may be imitating {k}")
sys.exit(1)
else:
print("No suspicious packages found.")
sys.exit(0)
if __name__ == "__main__":
main()