-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathhasher.py
50 lines (43 loc) · 1.52 KB
/
hasher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""
hashdir based on: https://github.com/cakepietoast/checksumdir
"""
import os
import hashlib
import re
class Hasher():
@classmethod
def sha1(self, filepath):
return self.hashdir(filepath) if os.path.isdir(filepath) else self.hashfile(filepath)
@classmethod
def hashfile(self, filepath):
filehash = hashlib.sha1()
with open(filepath, mode='rb') as f:
content = f.read()
filehash.update(str("blob " + str(len(content)) + "\0").encode('UTF-8'))
filehash.update(content)
return filehash.hexdigest()
@classmethod
def hashdir(self, dirpath):
hashvalues = []
for root, dirs, files in os.walk(dirpath, topdown=True):
if not re.search(r'/\.', root):
values = [self._dirfilehash(os.path.join(root, f)) for f in files if not f.startswith('.') and not re.search(r'/\.', f)]
hashvalues.extend(values)
return self._reducehash(hashvalues)
@classmethod
def _dirfilehash(self, filepath):
hasher = hashlib.sha1()
blocksize = 64 * 1024
with open(filepath, 'rb') as fp:
while True:
data = fp.read(blocksize)
if not data:
break
hasher.update(data)
return hasher.hexdigest()
@classmethod
def _reducehash(self, hashlist):
hasher = hashlib.sha1()
for hashvalue in sorted(hashlist):
hasher.update(hashvalue.encode('utf-8'))
return hasher.hexdigest()