public static MailFormatHelper::htmlToText($string, $allowed_tags = NULL)
Transforms an HTML string into plain text, preserving its structure.
The output will be suitable for use as 'format=flowed; delsp=yes' text (RFC 3676) and can be passed directly to MailManagerInterface::mail() for sending.
We deliberately use LF rather than CRLF, see MailManagerInterface::mail().
This function provides suitable alternatives for the following tags: <a> <em> <i> <strong> <b> <br> <p> <blockquote> <ul> <ol> <li> <dl> <dt> <dd> <h1> <h2> <h3> <h4> <h5> <h6> <hr>
string $string: The string to be transformed.
array $allowed_tags: (optional) If supplied, a list of tags that will be transformed. If omitted, all supported tags are transformed.
string The transformed string.
public static function htmlToText($string, $allowed_tags = NULL) { // Cache list of supported tags. if (empty(static::$supportedTags)) { static::$supportedTags = array('a', 'em', 'i', 'strong', 'b', 'br', 'p', 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr'); } // Make sure only supported tags are kept. $allowed_tags = isset($allowed_tags) ? array_intersect(static::$supportedTags, $allowed_tags) : static::$supportedTags; // Make sure tags, entities and attributes are well-formed and properly // nested. $string = Html::normalize(Xss::filter($string, $allowed_tags)); // Apply inline styles. $string = preg_replace('!</?(em|i)((?> +)[^>]*)?>!i', '/', $string); $string = preg_replace('!</?(strong|b)((?> +)[^>]*)?>!i', '*', $string); // Replace inline <a> tags with the text of link and a footnote. // 'See <a href="https://www.drupal.org">the Drupal site</a>' becomes // 'See the Drupal site [1]' with the URL included as a footnote. static::htmlToMailUrls(NULL, TRUE); $pattern = '@(<a[^>]+?href="([^"]*)"[^>]*?>(.+?)</a>)@i'; $string = preg_replace_callback($pattern, 'static::htmlToMailUrls', $string); $urls = static::htmlToMailUrls(); $footnotes = ''; if (count($urls)) { $footnotes .= "\n"; for ($i = 0, $max = count($urls); $i < $max; $i++) { $footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . "\n"; } } // Split tags from text. $split = preg_split('/<([^>]+?)>/', $string, -1, PREG_SPLIT_DELIM_CAPTURE); // Note: PHP ensures the array consists of alternating delimiters and // literals and begins and ends with a literal (inserting $null as // required). // Odd/even counter (tag or no tag). $tag = FALSE; // Case conversion function. $casing = NULL; $output = ''; // All current indentation string chunks. $indent = array(); // Array of counters for opened lists. $lists = array(); foreach ($split as $value) { // Holds a string ready to be formatted and output. $chunk = NULL; // Process HTML tags (but don't output any literally). if ($tag) { list($tagname) = explode(' ', strtolower($value), 2); switch ($tagname) { // List counters. case 'ul': array_unshift($lists, '*'); break; case 'ol': array_unshift($lists, 1); break; case '/ul': case '/ol': array_shift($lists); // Ensure blank new-line. $chunk = ''; break; // Quotation/list markers, non-fancy headers. case 'blockquote': // Format=flowed indentation cannot be mixed with lists. $indent[] = count($lists) ? ' "' : '>'; break; case 'li': $indent[] = isset($lists[0]) && is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * '; break; case 'dd': $indent[] = ' '; break; case 'h3': $indent[] = '.... '; break; case 'h4': $indent[] = '.. '; break; case '/blockquote': if (count($lists)) { // Append closing quote for inline quotes (immediately). $output = rtrim($output, "> \n") . "\"\n"; // Ensure blank new-line. $chunk = ''; } // Fall-through. case '/li': case '/dd': array_pop($indent); break; case '/h3': case '/h4': array_pop($indent); case '/h5': case '/h6': // Ensure blank new-line. $chunk = ''; break; // Fancy headers. case 'h1': $indent[] = '======== '; $casing = '\Drupal\Component\Utility\Unicode::strtoupper'; break; case 'h2': $indent[] = '-------- '; $casing = '\Drupal\Component\Utility\Unicode::strtoupper'; break; case '/h1': case '/h2': $casing = NULL; // Pad the line with dashes. $output = static::htmlToTextPad($output, ($tagname == '/h1') ? '=' : '-', ' '); array_pop($indent); // Ensure blank new-line. $chunk = ''; break; // Horizontal rulers. case 'hr': // Insert immediately. $output .= static::wrapMail('', implode('', $indent)) . "\n"; $output = static::htmlToTextPad($output, '-'); break; // Paragraphs and definition lists. case '/p': case '/dl': // Ensure blank new-line. $chunk = ''; break; } } // Process blocks of text. else { // Convert inline HTML text to plain text; not removing line-breaks or // white-space, since that breaks newlines when sanitizing plain-text. $value = trim(Html::decodeEntities($value)); if (Unicode::strlen($value)) { $chunk = $value; } } // See if there is something waiting to be output. if (isset($chunk)) { // Apply any necessary case conversion. if (isset($casing)) { $chunk = call_user_func($casing, $chunk); } $line_endings = Settings::get('mail_line_endings', PHP_EOL); // Format it and apply the current indentation. $output .= static::wrapMail($chunk, implode('', $indent)) . $line_endings; // Remove non-quotation markers from indentation. $indent = array_map('\Drupal\Core\Mail\MailFormatHelper::htmlToTextClean', $indent); } $tag = !$tag; } return $output . $footnotes; }
© 2001–2016 by the original authors
Licensed under the GNU General Public License, version 2 and later.
Drupal is a registered trademark of Dries Buytaert.
https://api.drupal.org/api/drupal/core!lib!Drupal!Core!Mail!MailFormatHelper.php/function/MailFormatHelper::htmlToText/8.1.x