Here is the code :
Code:
<?php
function InitCurl()
{
global $ch;
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3");
}
function CurlGetContent($url)
{
global $ch;
curl_setopt($ch, CURLOPT_URL, $url);
return curl_exec($ch);
}
function DisplayNewsPage($news)
{
global $base_url;
global $today;
$b = $base_url.$today.'/';
print '<table><tr><td width=200 VALIGN="top">';
if (is_array($news['PicSet']))
{
foreach ($news['PicSet'] as $k => $v)
{
print "<img src=\"".$b.$v['Pic']."\" onmouseover=\"popImg(true, this);\" onmouseout=\"popImg(false);\" width=180><BR>\r\n";
print $v['PicText']."<BR><BR>\r\n";
}
}
print '</td><td VALIGN="top">';
print '<H1>'.$news['Title'].'</H1><BR><BR>';
print $news['Body'];
print '</td></tr>';
print '</table>'."\r\n";
ob_flush();
flush();
}
function GetPic($url)
{
global $base_url;
global $today;
$page = CurlGetContent($base_url.$today.'/'.$url.'?Mode=1');
$pic = $pictext = "";
if (!preg_match_all('|mainphotolink"><img src="([^"]+)"|U', $page, $layer1, PREG_SET_ORDER)) return "";
$pic = $layer1[0][1];
if (preg_match_all("|<td align=\"center\" class=\"caption\">(.*?)</td>|U", $page, $layer2, PREG_SET_ORDER))
{
$pictext = $layer2[0][1];
}
return Array(
'Pic' => $pic,
'PicText' => $pictext,
);
}
function GetNewsContent($url)
{
global $base_url;
global $today;
$page = CurlGetContent($base_url.$today.'/'.$url);
$p = $body = $title = $pictext = $pic = "";
if (preg_match_all("|<h1>(.*?)</h1>|U", $page, $layer1, PREG_SET_ORDER))
{
$title = str_replace('<br>', ' ', $layer1[0][1]);
}
if (preg_match_all('|<div class="[^"]*" id="newscontent[\d]*">(.*?)</div>|is',$page, $layer2, PREG_SET_ORDER))
{
foreach ($layer2 as $v)
{
$body .= $v[1];
}
}
if (preg_match_all("|<a href=\"([^\"]+)\?Mode=1\">|U",$page, $layer3, PREG_SET_ORDER))
{
foreach ($layer3 as $v)
{
$ar = GetPic($v[1]);
if (is_array($ar)) $p[$v[1]] = $ar;
}
}
return Array(
'Title' => $title,
'Body' => $body,
'PicSet' => $p,
);
}
function ProcessAllTitles()
{
global $title_pages_set;
if (!is_array($title_pages_set))
{
echo 'ERR';
return;
}
foreach ($title_pages_set as $key => $val)
{
if (!strstr($val,"main"))
{
ProcessPage($key, $val);
}
}
}
function GetTitle($ming)
{
if (!preg_match_all("|sublink\[0\] = \'(.*?)\'|U",$ming, $layer1, PREG_SET_ORDER)) return "1";
if (!preg_match_all("|<a href=\"([^\"]+)\"[^>]*?>([^<]+)</a>|U",$layer1[0][1], $layer2, PREG_SET_ORDER)) return "2";
$titles = "";
foreach ($layer2 as $v)
{
if (!preg_match_all("|index.htm|U", $v[1], $layer3, PREG_SET_ORDER)) continue;
$title = $v[2];
$link = $v[1];
$titles[] = Array(
'Title' => $title,
'Link' => $link,
);
}
return $titles;
}
function GetHeadlines($url)
{
global $base_url;
global $today;
$page = CurlGetContent($base_url.$today.'/'.$url);
$heads = "";
if (!preg_match_all('#<(h1|li)>(.*?)</\1>#U', $page, $layer1, PREG_SET_ORDER)) return;
foreach ($layer1 as $v)
{
if (!preg_match_all("|<a href=\"([^\"]+)\">(.*?)</a>|U", $v[2], $layer2, PREG_SET_ORDER)) continue;
$title = str_replace('<br>',' ',trim($layer2[0][2]));
$link = $layer2[0][1];
$heads[$link] = Array(
'Title' => $title,
'Link' => $link,
);
}
return $heads;
}
function DisplayHeader($heads, $name)
{
print '<FORM action='.$_SERVER['PHP_SELF'].' method=POST target=_blank>';
print '<table><tr>';
print '<td width=100 valign=top>'.$name ."<BR>\n";
print '<INPUT type=SUBMIT value=Submit><INPUT type=RESET>';
print '<input type=BUTTON onclick="selectAll(this.form);" value=SelectAll>';
print '<INPUT type=HIDDEN name=ShowNews value=1>';
print "<BR>\r\n</td><td valign=top>";
foreach ($heads as $v)
{
print "<INPUT type=CHECKBOX name=".$v['Link']." value=".$v['Link'].">";
print "<a href=".$_SERVER['PHP_SELF']."?ShowNews=1&".$v['Link']."=".$v['Link']." target=_blank>".$v['Title']."</a><BR>\n";
}
print '</td></tr></table>';
print '</FORM>'."\r\n";
}
function PrintJavaScript()
{
?>
<script type='text/javascript'>
function get(eid)
{
var d = document;
var r = d.getElementById(eid);
return r;
}
function popImg(open, iref)
{
if (open)
{
var top = (iref.offsetParent.offsetParent.offsetTop + iref.offsetTop) + 'px';
var curleft = 0;
var obj = iref;
do {
curleft += obj.offsetLeft;
} while (obj = obj.offsetParent);
var left = (curleft + iref.offsetWidth )+ 'px';
var img = '<img src="' + iref.src + '" />';
var d = document;
if (null == get('popImg'))
{
var pop = d.createElement('DIV');
pop.id = 'popImg';
pop.style.position = 'absolute';
d.body.appendChild(pop);
}
var pop = get('popImg');
pop.innerHTML = img;
pop.style.top = top;
pop.style.left = left;
pop.style.display = 'block';
}
else
{
var pop = get('popImg');
pop.style.display = 'none';
}
}
function selectAll(formObj)
{
for (var i=0;i < formObj.length;i++)
{
fldObj = formObj.elements[i];
if (fldObj.type == 'checkbox')
{
fldObj.checked = true;
}
}
}
</script>
<?PHP
}
function GetToday($ming)
{
global $today;
global $base_url;
global $main_url;
if (!preg_match_all('|<base href="http://news.mingpao.com/(\d+)/">|U',$ming, $layer1, PREG_SET_ORDER))
{
return "";
}
return $layer1[0][1];
}
function PrepareGlobals()
{
global $HTTP_HEADER;
global $HTTP_FOOTER;
global $today;
global $base_url;
global $main_url;
InitCurl();
$HTTP_HEADER = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=Big5-HKSCS"><title>My MingPaoNews</title>';
$HTTP_HEADER.= '<style type="text/css"> ';
$HTTP_HEADER.= 'a:link { color: #0000FF; text-decoration: none; } ';
$HTTP_HEADER.= 'a:active { color: #000088; text-decoration: underline; } ';
$HTTP_HEADER.= 'a:visited { color: #000088; text-decoration: none; } ';
$HTTP_HEADER.= 'a:hover { color: #0055FF; text-decoration: underline; } ';
$HTTP_HEADER.= 'table {width: 100% ; border-width:1px; border-collapse: collapse; border-color:#003333; border-style:dashed} ';
$HTTP_HEADER.= 'td {padding: 3px;} ';
$HTTP_HEADER.= '</style>';
$HTTP_HEADER.= '</head><body>'."\r\n";
$HTTP_FOOTER = "\r\n".'</body></html>'."\r\n" ;
$base_url = "http://news.mingpao.com/";
$main_url = $base_url.'index.htm';
$today = GetToday(CurlGetContent($main_url));
}
function DoShowNews()
{
global $HTTP_HEADER;
global $HTTP_FOOTER;
print $HTTP_HEADER;
PrintJavaScript();
foreach($_REQUEST as $k => $v)
{
if ($k == "ShowNews") continue;
$news = GetNewsContent($v);
if (is_array($news)) DisplayNewsPage($news);
}
print $HTTP_FOOTER;
}
function DoIndex()
{
global $HTTP_HEADER;
global $HTTP_FOOTER;
global $main_url;
print $HTTP_HEADER;
PrintJavaScript();
$ming = CurlGetContent($main_url);
$titles = GetTitle($ming);
if (!is_array($titles))
{
print "ERR Titles";
print $HTTP_FOOTER;
return;
}
foreach($titles as $v)
{
$head = GetHeadlines($v['Link']);
if (is_array($head)) DisplayHeader($head, $v['Title']);
}
print $HTTP_FOOTER;
}
// MAIN HERE
PrepareGlobals();
if (isset($_REQUEST['ShowNews']))
{
DoShowNews();
}
else
{
DoIndex();
}
?>