Changeset View
Changeset View
Standalone View
Standalone View
src/parser/html/PhutilHTMLParser.php
Context not available. | |||||
$segments = array(); | $segments = array(); | ||||
$in_tag = false; | $in_tag = false; | ||||
for ($ii = 0; $ii < strlen($corpus); $ii++) { | for ($ii = 0; $ii < @strlen($corpus); $ii++) { | ||||
$c = $corpus[$ii]; | $c = $corpus[$ii]; | ||||
if ($in_tag && ($c === '>')) { | if ($in_tag && ($c === '>')) { | ||||
Context not available. | |||||
// "tag" strings and a list of "non-tag" strings. | // "tag" strings and a list of "non-tag" strings. | ||||
$parts = array(); | $parts = array(); | ||||
$corpus_length = strlen($corpus); | $corpus_length = @strlen($corpus); | ||||
foreach ($segments as $segment) { | foreach ($segments as $segment) { | ||||
$tag = $segment['tag']; | $tag = $segment['tag']; | ||||
$pos = $segment['pos']; | $pos = $segment['pos']; | ||||
Context not available. | |||||
} | } | ||||
if (($slice_pos < $corpus_length) && ($slice_len > 0)) { | if (($slice_pos < $corpus_length) && ($slice_len > 0)) { | ||||
$content = substr($corpus, $slice_pos, $slice_len); | $content = @substr($corpus, $slice_pos, $slice_len); | ||||
} else { | } else { | ||||
$content = ''; | $content = ''; | ||||
} | } | ||||
Context not available. | |||||
$content = $raw_content; | $content = $raw_content; | ||||
$content = trim($content); | $content = trim($content); | ||||
$content_len = strlen($content); | $content_len = @strlen($content); | ||||
// If the tag content begins with "/", like "</td>", strip the slash | // If the tag content begins with "/", like "</td>", strip the slash | ||||
// off and mark this as a closing tag. | // off and mark this as a closing tag. | ||||
$is_close = false; | $is_close = false; | ||||
if ($content_len > 0 && $content[0] === '/') { | if ($content_len > 0 && $content[0] === '/') { | ||||
$is_close = true; | $is_close = true; | ||||
$content = substr($content, 1); | $content = @substr($content, 1); | ||||
$content = trim($content); | $content = trim($content); | ||||
$content_len = strlen($content); | $content_len = @strlen($content); | ||||
} | } | ||||
// If the tag content ends with "/", like "<td />", strip the slash off | // If the tag content ends with "/", like "<td />", strip the slash off | ||||
Context not available. | |||||
$self_close = false; | $self_close = false; | ||||
if ($content_len > 0 && $content[$content_len - 1] === '/') { | if ($content_len > 0 && $content[$content_len - 1] === '/') { | ||||
$self_close = true; | $self_close = true; | ||||
$content = substr($content, 0, $content_len - 1); | $content = @substr($content, 0, $content_len - 1); | ||||
$content = trim($content); | $content = trim($content); | ||||
$content_len = strlen($content); | $content_len = @strlen($content); | ||||
} | } | ||||
// If this tag is both a closing tag and a self-closing tag, it is | // If this tag is both a closing tag and a self-closing tag, it is | ||||
Context not available. | |||||
} | } | ||||
// If there's no tag name, this tag is not valid. Treat it as content. | // If there's no tag name, this tag is not valid. Treat it as content. | ||||
if (!strlen($tag_name)) { | if (!@strlen($tag_name)) { | ||||
return null; | return null; | ||||
} | } | ||||
// If this is a closing tag with attributes, it's not valid. Treat it | // If this is a closing tag with attributes, it's not valid. Treat it | ||||
// as content. | // as content. | ||||
if ($is_close && strlen($attributes)) { | if ($is_close && @strlen($attributes)) { | ||||
return null; | return null; | ||||
} | } | ||||
Context not available. | |||||
return null; | return null; | ||||
} | } | ||||
if (strlen($attributes)) { | if (@strlen($attributes)) { | ||||
$attribute_map = $this->parseAttributes($attributes); | $attribute_map = $this->parseAttributes($attributes); | ||||
// If the attributes can't be parsed, treat the tag as content. | // If the attributes can't be parsed, treat the tag as content. | ||||
if ($attribute_map === null) { | if ($attribute_map === null) { | ||||
Context not available. | |||||
); | ); | ||||
$map = array(); | $map = array(); | ||||
$len = strlen($attributes); | $len = @strlen($attributes); | ||||
$key_pos = null; | $key_pos = null; | ||||
for ($ii = 0; $ii < $len; $ii++) { | for ($ii = 0; $ii < $len; $ii++) { | ||||
$c = $attributes[$ii]; | $c = $attributes[$ii]; | ||||
Context not available. | |||||
// Finding a "=" or a space character ends the attribute name. | // Finding a "=" or a space character ends the attribute name. | ||||
// Save it, then figure out what to do with the parser state. | // Save it, then figure out what to do with the parser state. | ||||
if ($c === '=' || $is_space) { | if ($c === '=' || $is_space) { | ||||
$name_value = substr($attributes, $name_pos, $ii - $name_pos); | $name_value = @substr($attributes, $name_pos, $ii - $name_pos); | ||||
$name_value = phutil_utf8_strtolower($name_value); | $name_value = phutil_utf8_strtolower($name_value); | ||||
// If this attribute already exists, the tag is invalid. This means | // If this attribute already exists, the tag is invalid. This means | ||||
Context not available. | |||||
// We found the closing quote, so pull out the actual value. | // We found the closing quote, so pull out the actual value. | ||||
if ($c === '"') { | if ($c === '"') { | ||||
$attr_value = substr($attributes, $value_pos, $ii - $value_pos); | $attr_value = @substr($attributes, $value_pos, $ii - $value_pos); | ||||
$map[$name_value] = $attr_value; | $map[$name_value] = $attr_value; | ||||
$state = 'key'; | $state = 'key'; | ||||
Context not available. | |||||
// We've found some whitespace, so pull out the actual value. | // We've found some whitespace, so pull out the actual value. | ||||
if ($is_space) { | if ($is_space) { | ||||
$attr_value = substr($attributes, $value_pos, $ii - $value_pos); | $attr_value = @substr($attributes, $value_pos, $ii - $value_pos); | ||||
$map[$name_value] = $attr_value; | $map[$name_value] = $attr_value; | ||||
$state = 'key'; | $state = 'key'; | ||||
Context not available. | |||||
case 'name': | case 'name': | ||||
// We were looking for the end of an attribute name. Treat whatever | // We were looking for the end of an attribute name. Treat whatever | ||||
// we found as a name. | // we found as a name. | ||||
$name_value = substr($attributes, $name_pos, $len - $name_pos); | $name_value = @substr($attributes, $name_pos, $len - $name_pos); | ||||
if (isset($map[$name_value])) { | if (isset($map[$name_value])) { | ||||
return null; | return null; | ||||
Context not available. | |||||
// the delimiter or closing quote. Treat whatever we found as a quoted | // the delimiter or closing quote. Treat whatever we found as a quoted | ||||
// value. | // value. | ||||
$attr_value = substr($attributes, $value_pos, $len - $name_pos); | $attr_value = @substr($attributes, $value_pos, $len - $name_pos); | ||||
$map[$name_value] = $attr_value; | $map[$name_value] = $attr_value; | ||||
break; | break; | ||||
Context not available. |
Content licensed under Creative Commons Attribution-ShareAlike 4.0 (CC-BY-SA) unless otherwise noted; code licensed under Apache 2.0 or other open source licenses. · CC BY-SA 4.0 · Apache 2.0