This repository was archived by the owner on Apr 21, 2018. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathfunctions.php
131 lines (101 loc) · 4.05 KB
/
functions.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
<?php
/**
* Main function file which provides many of the transformations to clean up the word Export
* @author Benjamin J. Balter
* @package word-to-html
*/
/**
* Parses Word-Style footnotes (via save-as HTML) into a cleaner, bootstrap-compatible format
*
* Adapted from https://github.com/benbalter/Convert-Microsoft-Word-Footnotes-to-WordPress-Simple-Footnotes/blob/master/parse-footnotes.php which parsed into WordPress's Simple Footnotes format
*
* NOTE: This version assumes the document has already been cleaned up via kses,
* otherwise, the regex won't work due to the extra attributes
*
* @param string the ugly-footnoted HTML
* @return string the pretty HTML
*/
function bb_parse_footnotes( $content ) {
$content = stripslashes( $content );
//grab all the Word-style footnote references into an array
$pattern = '#\<a name\="_ftnref([0-9]+)" title\=""\>\[([0-9]+)\]\</a\>#';
preg_match_all( $pattern, $content, $refs, PREG_SET_ORDER);
//grab all the Word-style footnote into an array
$pattern = '#\<p\>\<a name\="_ftn([0-9]+)" title\=""\>\[([0-9]+)\]\</a\>[ ]?(.*?)\</p\>#is';
preg_match_all( $pattern, $content, $footnotes, PREG_SET_ORDER);
//build find and replace arrays
foreach ($refs as $ID => $ref) {
$find[] = '#\<a name\="_ftnref'. $ref[2] .'" title\=""\>\['. $ref[2] .'\]\</a\>#';
$replace[] = '<sup><a class="footnote" id="fnref' . $ref[2] . '" href="#fn' . $ref[2] . '" title="' . strip_tags( $footnotes[$ID][3] ) .'">'. $ref[2] .'</a></sup>';
}
foreach ( $footnotes as $footnote ) {
$find[] = '#\<p\>\<a name\="_ftn' . $footnote[2] .'" title\=""\>\[' . $footnote[2] .']\</a\>[ ]?(.*?)\</p\>#is';
$replace[] = '<li id="fn' . $footnote[2] . '"><p>' . $footnote[3] . ' <a href="#fnref' . $footnote[2] . '">↩</a></p></li>';
}
//make the switch
$content = preg_replace( $find, $replace, $content );
return $content;
}
/**
* Sometimes Word likes to hide its CSS within comment tags depending on what options you choose on export, remove 'em
*/
function bb_strip_comments( $content ) {
return preg_replace("/<!--.*-->/Uis", "", $content);
}
/**
* After cleanup, we're left with extra P tags with noting in them... yeah, not going to need those
*/
function bb_remove_empty_ps( $content ) {
$content = str_replace( "<p></p>\n", '', $content );
return str_replace( "<p> </p>\n", '', $content );
}
/**
* This isn't 1999. Change all <b> tags to <strong> tags
*/
function bb_b_to_strong( $content ) {
$content = str_replace( '<b>', '<strong>', $content );
return str_replace( '</b>', '</strong>', $content );
}
/**
* Change all <i> to <em> tags... semantic markup is fun!
*/
function bb_i_to_em( $content ) {
$content = str_replace( '<i>', '<em>', $content );
return str_replace( '</i>', '</em>', $content );
}
/**
* Normalize line endings by converting all to UNIX format
*/
function bb_normalize_line_endings( $content ) {
$content = str_replace("\r\n", "\n", $content);
$content = str_replace("\r", "\n", $content);
$content = preg_replace("/\n{2,}/", "\n\n", $content);
return $content;
}
/**
* Why or why would you hard wrap the text?
* Remove the hard line wrap, trying our best to preserve line breaks when appropriate
* @TODO is there a better way to do this?
*/
function bb_remove_hard_word_wrap( $content ) {
$replacement = "{{ DOUBLE LINE BREAK }}";
$content = preg_replace( "#\n\n#", $replacement, $content );
$content = preg_replace( "#\n#", ' ', $content );
$content = preg_replace( "#{$replacement}#", "\n\n", $content );
return $content;
}
/**
* Ensures that text encoding is UTF-8
*/
function bb_normalize_encoding( $content ) {
return mb_convert_encoding( $content, 'utf-8', mb_detect_encoding( $content ) );
}
/**
* Remove all consequetive spaces so that all spaces are single spaces, and actual spaces, not html entity spaces
*/
function bb_remove_extra_spaces( $content ) {
while( strpos( $content, ' ' ) !== false )
$content = str_replace( ' ', ' ', $content );
$content = str_replace( ' ', ' ', $content );
return $content;
}