用php抓取google关键词排名

更新时间:2023-04-07 05:56:05 阅读：评论：0

说下思路，利用php的curl函数储存cookie，google搜索页面是无法用file_get_connents打开的，必须要完全模拟才行,百度就不同了，直接用file_get_conntens抓取页面，然我不想上学后用正则处理下就行了，这里就不列举百度了。

<?php

header(“content-type: text/html;chart=utf-8”);

function ggarch($url_s, $keyword, $page = 1) {

$e和男生聊天的话题nkeyword = urlencode($keyword);

$rsstate = fal;

$page_num = ($page -1) * 10;

if ($page <= 10) {

$interface = “eth0:” . rand(1, 4); //避免gg封ip

$cookie_file = dirname(__file__) . “/temp/google.txt”; //存储cookie值

$url = “https://www.google.com/arch?q=$enkeyword&hl=en&prmd=imvns&ei=jpnjtvlfi8hlggexwbrl&start=$page_num&sa=n”;

$ch = curl_init();

curl_topt($ch, curlopt_url, $url);

//curl_topt($ch, curlopt_uragent, $_rver[‘http_ur_agent’]);//获取浏览器类型

curl_topt($ch, curlopt_uragent, “mozilla/5.0 (windows; u; windows nt 6.1; en-us; rv:1.9.1.2) gecko/20090729 firefox/3.5.2 gtb5”);

curl_topt($ch, curlopt_interface, “$interface”); //指定访问ip地址

curl_topt($ch, curlopt_returntransfer, 1);

curl_topt($ch, curlopt_followlocation, 1);

curl_topt($ch, curlopt_cookiejar, $cookie_file);

$contents = curl_exec($ch);

curl_clo($ch);

$match = “!<p\s*id=\”arch\”>(.*)</p>\s+<\!–z–>!”;

preg_match_all(“$match”, “$conten邓君蕊ts”, $line);

while (list ($k, $v) = each($line[0])) {

preg_match_all(“!<h3\s+class=\”r\”><a[^>]+>(.*?)</a>!”, $v, $title);

$num = count($title[1]);

for ($i = 0; $i < $num; $i++) {

if (strstr($title[0][$i], $url_s)) {

$rsstate = true;

$j = $i +1;

$sum = $j + (($page) * 10 – 10);

//echo $contents;

echo “关键字” . $keyword . “<br>” . “排名：” . ‘<font color=”red” size=”20″ >’ . $sum . ‘</font>’ . “####” . “第” . ‘<font color=”#00ffff” size=”18″ >’.$page . ‘</font>’. ” 页” . “第” .'<font color=”#8000ff” size=”15″ >’.$j . ‘</font>’. “名” . $title[0][$i] . “<br>”;

echo “<a href='” . $url . “‘>” . “点击搜索结果” . “</a>” . “<br>”;

echo “<hr>”;

break;

}

unt ($contents);

if ($rsstate === fal) {

ggarch($url_s, $keyword, ++ $page); //找不到搜索页面的继续往下搜索

中班下学期幼儿评语 }

} el {

echo ‘关键字’ . $keyword . ’10页之内没有该网站排名’ . ‘<br>’;

echo “<hr>”;

}

if (!empty ($_post[‘submit’])) {

$time = explode(‘ ‘, microtime());

$start = $time[0] + $time[1];

$more_key = trim($_post[‘textarea’]);

$url_s = trim($_post[‘url’]);

if (!empty ($more_key) && !empty ($url_s)) {

/*判断输入字符的规律*/

if (strstr($more_key, “\n”)) {

$exkey = explode(“\n”, $more_key);

}

if(strstr($more_key, “|”)) {

$exkey = explode(“|”, $more_key);

}

if(!strstr($more_key, “\n”)&&!strstr($more_key, “|”)){

$exkey=array($more_key);

}

/*判断是否有www或者https://之类的东西*/

if (count(explode(‘.’, $url_s)) <= 2) {

$url = ltrim($url_s, ‘https://www’);

$url = ‘www.’ . $url_s;

}

foreach ($exk当选感言ey as $keyword) {

//$keyword;

ggarch($url_s, $keyword);

}

$endtime = explode(‘ ‘, microtime());

$end = $endtime[0] + $endtime[1];

echo ‘<hr>’;

echo ‘程序运行时间: ‘;

echo $end – $start;

//die();

}

<!doctype html public “-//w3c//dtd xhtml 1.0 transitional//en” “/d/file/titlepic/xhtml1-transitional.dtd& <html xmlns=”https://www.w3.org/1999/xhtml”>

<head>

</head>

<body>

格式例如：keyword1|keyword2|keyword3

或者: keyword1

keyword2

keyword3

</textarea>

<span>url地址：</span><input type=”text” name=”url”>

www.2cto.com

</form>

</body>