新闻动态

   

您现在的位置是:首页 » 新闻动态 » 网站技术


html_decode


人气: 1947  日期: 2012-04-24  作者: 大向网络  [打印本文]

<?PHP


function html_decode($text) {
$text=str_replace("&amp;","&",$text);
$text=str_replace("&quot;","\"",$text);
$text=str_replace("&034;",'"',$text);


$text=str_replace("&#224;","?,$text);
$text=str_replace("&#225;","?,$text);
$text=str_replace("&#226;","?,$text);
$text=str_replace("&#228;","?,$text);


$text=str_replace("&#232;","?,$text);
$text=str_replace("&#233;","?,$text);
$text=str_replace("&#234;","?,$text);
$text=str_replace("&#235;","?,$text);


$text=str_replace("&#236;","?,$text);
$text=str_replace("&#237;","?,$text);
$text=str_replace("&#238;","?,$text);
$text=str_replace("&#239;","?,$text);


$text=str_replace("&#242;","?,$text);
$text=str_replace("&#243;","?,$text);
$text=str_replace("&#244;","?,$text);
$text=str_replace("&#246;","?,$text);


$text=str_replace("&#249;","?,$text);
$text=str_replace("&#250;","?,$text);
$text=str_replace("&#251;","?,$text);
$text=str_replace("&#252;","?,$text);




$text=str_replace("&auml;", "?,$text);
$text=str_replace("&euml;", "?,$text);
$text=str_replace("&iuml;", "?,$text);
$text=str_replace("&uuml;", "?,$text);
$text=str_replace("&ouml;", "?,$text);


$text=str_replace("&agrave;", "?,$text);
$text=str_replace("&egrave;", "?,$text);
$text=str_replace("&igrave;", "?,$text);
$text=str_replace("&ograve;", "?,$text);
$text=str_replace("&ugrave;", "?,$text);


$text=str_replace("&aacute;", "?,$text);
$text=str_replace("&eacute;", "?,$text);
$text=str_replace("&iacute;", "?,$text);
$text=str_replace("&oacute;", "?,$text);
$text=str_replace("&uacute;", "?,$text);


$text=str_replace("&acirc;", "?,$text);
$text=str_replace("&ecirc;", "?,$text);
$text=str_replace("&icirc;", "?,$text);
$text=str_replace("&ocirc;", "?,$text);
$text=str_replace("&ucirc;", "?,$text);


$text=str_replace("&aring;", "?,$text);
$text=str_replace("&ccedil;", "?,$text);


$text=str_replace("&acute;", "'",$text);
$text=str_replace("&nbsp;", " ",$text);
$text=str_replace("&ndash;", "-",$text);


// special quotes to normal
$text=str_replace("&#8217;", "'",$text);
$text=str_replace("&#8216;", "'",$text);
$text=str_replace("&rsquo;", "'",$text);
$text=str_replace("&raquo;", "'",$text);
$text=str_replace("&lsquo;", "'",$text);
$text=str_replace("&laquo;", "'",$text);


$text=str_replace("&#8220;", '"',$text);
$text=str_replace("&#8221;", '"',$text);
$text=str_replace("&#8222;", '"',$text);
$text=str_replace("&bdquo;", '"',$text);
$text=str_replace("&ldquo;", '"',$text);
$text=str_replace("&rdquo;", '"',$text);
$text=str_replace("&hellip;", '...',$text);
$text=str_replace("&shy;", '-',$text);
$text=str_replace("\xC2\xA0"," ",$text);
return($text);
}


function html2txt($document) {
$text=$document;
$text=html_decode($text);
// strip styles
$text = preg_replace('@<style[^>]*?>.*?</style>@si', '', $text);
// strip scripts
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
// strip comments
$text = preg_replace('@<!--*?.*?-->@si', '', $text);
// replace P and BR and TD and H with end of sentence
$text = preg_replace('@<p[^>]*?.*?>@si', "\r\r", $text);
$text = preg_replace('@<\/p[^>]*?.*?>@si', "\r\r", $text);
$text = preg_replace('@<br[^>]*?.*?>@si',"\r\r", $text);
$text = preg_replace('@<td[^>]*?.*?>@si', "\r\r", $text);
$text = preg_replace('@<\/td[^>]*?.*?>@si', "\r\r", $text);
$text = preg_replace('@<h[^>]*?.*?>@si', "\r\r", $text);
$text = preg_replace('@<\/h[^>]*?.*?>@si', "\r\r", $text);
// remove remaining tags
$text = preg_replace('@<[\/\!]*?[^<>]*?>@si','', $text);
return $text;
}


// main routine
$fn=$argv[1];
if (!$fn) {
echo "\r\nsupply html file name to convert to txt";
} else {
$fc=file_get_contents($fn);
if (!$fc) {
echo "\r\nfile not found";
} else {
$fnn=$fn.".tx";
//echo "\r\n".$fnn;
$fo=fopen($fnn,"w");
if ($fo) {
fwrite($fo,html2txt($fc));
fclose($fo);
} else {
echo "\r\nerror creating output file";
}
}
}
?>

  上一篇:asp的split拆分函数使用
  下一篇:html5自带得表单验证代码 做验证再也不用那么复杂了
相关文章(Tags:html_decode,UBB,html)