-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathindex.php
216 lines (203 loc) · 7.53 KB
/
index.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
<?php
require_once( 'header.php' );
error_reporting(E_ERROR|E_CORE_ERROR|E_COMPILE_ERROR); // E_ALL|
ini_set('display_errors', 'On');
$lang = isset( $_REQUEST['lang'] ) ? $_REQUEST['lang'] : '';
preg_match_all( '/[a-z-]+/', strtolower( $lang ), $matches );
$languages = $matches[0];
if (isset($_REQUEST['limit']) && $_REQUEST['limit']) {
$limit = (int)$_REQUEST['limit'];
} else {
$limit = 50;
}
$hasFormData = $languages !== [] && (
isset( $_REQUEST['description'] ) ||
isset( $_REQUEST['labels'] ) ||
isset( $_REQUEST['sitelinks'] )
);
?>
<style>
tr.probably-damaging {background-color: #fef7e6;}
tr.very-likely-damaging {background-color: #fee7e6;}
a {color: #36c;}
div.checkbox {margin: 1em; margin-left: 0;}
</style>
<script>
$(function() {
$('table.sortable').tablesort();
});
</script>
<div style="padding: 3em;">
<form action="<?php echo basename( __FILE__ ); ?>">
<label for="lang">Language code(s)</label><br>
<div class="ui corner labeled input">
<input style="margin-bottom: 0.5em" type="text" name="lang" id="lang" required placeholder="en,fa,nl-informal" <?php
if ( $lang !== '' ) {
echo 'value="' . htmlspecialchars( $lang ) . '"';
}
?>>
<div class="ui corner label">
<i class="asterisk icon"></i>
</div></div><br>
<label for="limit">Limit</label><br>
<div class="ui labeled input">
<input style="margin-bottom: 0.5em" id="limit" name="limit" type="number" min="1" max="500" required value="<?php echo htmlspecialchars( $limit ); ?>">
</div>
<br>
<?php
function checkbox( $name, $description, $checked ) {
$checkedAttribute = $checked ? 'checked' : '';
echo <<< EOF
<div class="ui checkbox">
<input type="checkbox" name="$name" id="$name" $checkedAttribute>
<label for="$name">$description</label>
</div>
EOF;
}
checkbox( 'description', 'Changes in descriptions', isset( $_REQUEST['description'] ) || !$hasFormData );
checkbox( 'labels', 'Changes in labels and aliases', isset( $_REQUEST['labels'] ) );
checkbox( 'sitelinks', 'Sitelink removals', isset( $_REQUEST['sitelinks'] ) );
?>
<br>
<button type="submit" class="ui primary button">Search</button>
</form>
<?php
function userlink( $username ) {
$anonymousPattern = '/^\d+\.\d+\.\d+\.\d+|[0-9a-f]+(?::[0-9a-f]*)+$/i';
if ( preg_match( $anonymousPattern, $username ) ) {
$page = "Special:Contributions/{$username}";
} else {
$username = strtr( $username, ' ', '_' );
$page = "User:{$username}";
}
return "https://www.wikidata.org/wiki/${page}";
}
function languagesCommentRegexp( $regexpPrefix, $languages ) {
$languagePrefixes = array_map( static function ( $language ) {
// be-x-old for terms, be_x_old(wiki...) for sitelinks
return str_replace( '-', '[-_]', $language );
}, $languages );
return "comment_text REGEXP '" . $regexpPrefix . '...' . '(' . implode( '|', $languagePrefixes ) . ")(|wiki|wikisource|wikiquote|wikinews|wikibooks|wiktionary|wikiversity|wikivoyage) '";
}
if ( $hasFormData ) {
$limit = addslashes( (string)min( [ $limit, 500 ] ) );
$dbmycnf = parse_ini_file("../replica.my.cnf");
$dbuser = $dbmycnf['user'];
$dbpass = $dbmycnf['password'];
unset($dbmycnf);
$dbhost = "wikidatawiki.web.db.svc.wikimedia.cloud";
$dbname = "wikidatawiki_p";
$db = new PDO('mysql:host='.$dbhost.';dbname='.$dbname.';charset=utf8', $dbuser, $dbpass);
$conditions = [];
if ( isset( $_REQUEST['description'] ) ) {
$conditions[] = languagesCommentRegexp( 'wbsetdescription-(add|set|remove)', $languages );
}
if ( isset($_REQUEST['labels'] ) ) {
$conditions[] = languagesCommentRegexp( 'wbsetlabel-(add|set|remove)', $languages );
$conditions[] = languagesCommentRegexp( 'wbsetaliases-(add|set|remove|update)', $languages );
}
if ( isset($_REQUEST['sitelinks'] ) ) {
$conditions[] = languagesCommentRegexp( 'wbsetsitelink-remove', $languages );
}
$where = '(' . implode( ' OR ', $conditions ) . ')';
$sql = "SELECT rc_this_oldid, rc_title, actor_name, comment_text " .
"FROM recentchanges " .
"JOIN comment_recentchanges ON rc_comment_id = comment_id " .
"JOIN actor_recentchanges ON rc_actor = actor_id " .
"WHERE {$where} AND rc_patrolled = 0 " .
"ORDER BY rc_id desc LIMIT {$limit};";
$result = $db->query($sql)->fetchAll();
$entities = [];
foreach ($result as $row) {
$entities[] = $row['rc_title'];
}
$formattedEntitiesDictionary = [];
foreach ( array_chunk($entities, 50) as $entityChunks ) {
$formattedEntitiesDictionary = array_merge( $formattedEntitiesDictionary,
json_decode(
file_get_contents('https://www.wikidata.org/w/api.php?' . http_build_query([
'action' => 'wbformatentities',
'ids' => implode('|', $entityChunks),
'format' => 'json',
'formatversion' => '2',
])
), true
)['wbformatentities']
);
}
$revisionIds = [];
foreach ($result as $row) {
$revisionIds[] = $row['rc_this_oldid'];
}
if ( $revisionIds !== [] ) {
// TODO: Cache
$damagingId = $db->query("select oresm_id from ores_model where oresm_name = 'damaging' and oresm_is_current = 1")->fetchAll()[0]['oresm_id'];
$oresSql = "select oresc_rev, oresc_probability from ores_classification where oresc_model = {$damagingId} AND oresc_rev in (" . implode(", ", $revisionIds) . ")";
$oresResult = $db->query($oresSql)->fetchAll();
$oresDictionary = [];
foreach ($oresResult as $row) {
$oresDictionary[$row['oresc_rev']] = $row['oresc_probability'];
}
}
echo '<table class="ui sortable celled table"><thead><tr><th>Edit ID</th><th>Entity ID</th><th>Username</th><th>Entity title</th><th>Edit summary</th><th>ORES damaging score</th></tr></thead><tbody>';
echo "\n";
foreach ($result as $row) {
$id = $row['rc_this_oldid'];
$username = $row['actor_name'];
$title = $row['rc_title'];
$summary = parseComment( $row['comment_text'] );
$formatted = $formattedEntitiesDictionary[$row['rc_title']];
$damagingScore = $oresDictionary[$row['rc_this_oldid']];
$class = 'okay';
if ( $damagingScore > 0.72 ) {
$class = 'probably-damaging';
}
if ( $damagingScore > 0.983 ) {
$class = 'very-likely-damaging';
}
$userlink = userlink( $username );
echo "<tr class={$class}><td><a href=https://www.wikidata.org/wiki/Special:Diff/{$id} target='_blank'>{$id}</a></td><td><a href=https://www.wikidata.org/wiki/{$title} target='_blank'>{$title}</a></td><td><a href={$userlink} target='_blank'>{$username}</a></td><td>{$formatted}</td><td>{$summary}</td><td>{$damagingScore}</td></tr>\n";
}
echo "</table>\n";
} else {
echo '<div class="ui negative message">
<div class="header">
No query
</div>
<p>You need to enable at least one case</p></div>';
}
function parseComment( $comment ) {
$original = htmlspecialchars( $comment, ENT_QUOTES | ENT_HTML401, 'UTF-8' );
$actions = [
"add" => "Added",
"set" => "Changed",
"remove" => "Removed",
"update" => "Changed",
];
$types = [
"wbsetdescription" => ["description", "description"],
"wbsetaliases" => ["alias", "aliases"],
"wbsetlabel" => ["label", "labels"],
"wbsetsitelink" => ["sitelink", "sitelinks"],
];
if ( !preg_match( '%/\*\s*([a-z]+)-([a-z]+):(\d+)\|([\w-]+)\s*\*/(.+)?%', trim( $comment ), $match ) ) {
return $original;
}
if ( isset( $actions[$match[2]] ) ) {
$action = $actions[$match[2]];
}
$count = '';
if ( isset( $types[$match[1]] ) ) {
$type = $match[3] > 1 ? $types[$match[1]][1] : $types[$match[1]][0];
$count = $match[3] > 1 ? " $match[3]" : '';
}
$comment = htmlspecialchars( trim( $match[5] ) );
if ( !empty( $action ) && !empty( $type ) ) {
return "<span title=\"$original\">$action{$count} $type in <i>{$match[4]}</i>: <b>$comment</b></span>";
}
return $original;
}
?>
</div>
</body>
</html>