本文共 2177 字,大约阅读时间需要 7 分钟。
LOCAL_DB_HOST.':3306', 'user' => LOCAL_DB_USER, 'password' => LOCAL_DB_PWD, 'db' => LOCAL_DB_NAME,);$is_debug = true;// 命令行参数一为日志的开关$log = new CommonLog(array('log_level' => $_SERVER["argv"][1],));// 连接数据库实例$db = new DBConfig($config);$today = date('Ymd');$type_list = array( // 'game' => "http://top.baidu.com/buzz/game.html", 'webgame' => "http://top.baidu.com/buzz/mmogame.html", 'rpg' => "http://top.baidu.com/buzz/magic_rpg.html",);foreach ($type_list as $type => $url){ // 采集游戏排行榜 $max_date = null; $max_date = $db->query_single("select max(data_date) from web_baidu_gametop50 where type = '$type'"); if (! $max_date || $max_date < $today) { $log->debug("start at page: " . $url); // 读取网页内容,在读取失败时,可连续尝试9次。 $cn = 0; while ($cn < 9 && ($page = @file_get_contents($url)) === FALSE) $cn++; // 编码转换,phpquery无法再gb2312的情况下处理特殊字符 $page = preg_replace('/gb2312/i', 'utf-8', $page); // 转换编码, 并解析 $doc = phpQuery::newDocumentHTML( mb_convert_encoding($page, 'utf-8', 'gb2312') ); // 解析文档 foreach ($doc->find("div.list > table > tbody > tr")->not(".th") as $tr) { $tr = pq($tr); $data = array(); $data['index_id'] = $tr->find('> th:nth-child(1)')->text(); $data['key_name'] = $tr->find('> td:nth-child(2)')->text(); // 今日搜索 $data['search_num'] = $tr->find('> td:nth-child(5)')->text(); // 最近七日 $data['count_num'] = $tr->find('> td:nth-child(6)')->text(); // $data['online_day'] = $tr->find('> td:nth-child(5)')->text(); // $data['avg_num'] = $tr->find('> td:nth-child(6)')->text(); $data['online_day'] = 0; $data['avg_num'] = 0; $data['data_date'] = $today; $data['type'] = $type; $db->insert_array("web_baidu_gametop50", $data); } } else { $log->debug("page: (" . $url . ")has gathered before"); }}?>
dd
转载地址:http://rbdhb.baihongyu.com/