forked from reddit-archive/reddit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean_static_files.py
executable file
·125 lines (100 loc) · 4.18 KB
/
clean_static_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/python
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is reddit.
#
# The Original Developer is the Initial Developer. The Initial Developer of
# the Original Code is reddit Inc.
#
# All portions of the code written by reddit are Copyright (c) 2006-2015 reddit
# Inc. All Rights Reserved.
###############################################################################
"""Clean up the static files S3 bucket.
This script removes static files that are no longer used from the S3 bucket.
"""
import datetime
import itertools
import os
import subprocess
from pylons import g
from r2.lib.db.operators import desc
from r2.lib.plugin import PluginLoader
from r2.lib.utils import fetch_things2
from r2.lib.utils import read_static_file_config
from r2.models import Subreddit
import r2
def get_mature_files_on_s3(bucket):
"""Enumerate files currently on S3 that are older than one day."""
minimum_age = datetime.timedelta(days=1)
minimum_birthdate = datetime.datetime.utcnow() - minimum_age
remote_files = {}
for key in bucket.list():
last_modified = datetime.datetime.strptime(key.last_modified,
"%Y-%m-%dT%H:%M:%S.%fZ")
if last_modified < minimum_birthdate:
remote_files[key.name] = key
return remote_files
def _get_repo_source_static_files(package_root):
static_file_root = os.path.join(package_root, "public", "static")
old_root = os.getcwd()
try:
os.chdir(static_file_root)
except OSError:
# this repo has no static files!
return
try:
git_files_string = subprocess.check_output([
"git", "ls-tree", "-r", "--name-only", "HEAD", static_file_root])
git_files = git_files_string.splitlines()
prefix = os.path.commonprefix(git_files)
for path in git_files:
filename = path[len(prefix):]
yield filename
finally:
os.chdir(old_root)
def get_source_static_files(plugins):
"""List all static files that are committed to the git repository."""
package_root = os.path.dirname(r2.__file__)
# oh "yield from", how i wish i had thee.
for filename in _get_repo_source_static_files(package_root):
yield filename
for plugin in plugins:
for filename in _get_repo_source_static_files(plugin.path):
yield filename
def get_generated_static_files():
"""List all static files that are generated by the build process."""
PluginLoader() # ensure all the plugins put their statics in
for filename, mangled in g.static_names.iteritems():
yield filename
yield mangled
_, ext = os.path.splitext(filename)
if ext in (".css", ".js"):
yield filename + ".gzip"
yield mangled + ".gzip"
def clean_static_files(config_file):
bucket, config = read_static_file_config(config_file)
ignored_prefixes = tuple(p.strip() for p in
config["ignored_prefixes"].split(","))
plugins = PluginLoader()
reachable_files = itertools.chain(
get_source_static_files(plugins),
get_generated_static_files(),
)
condemned_files = get_mature_files_on_s3(bucket)
for reachable_file in reachable_files:
if reachable_file in condemned_files:
del condemned_files[reachable_file]
for filename, key in condemned_files.iteritems():
if not filename.startswith(ignored_prefixes):
key.delete()