龙行博客

走路看风景,经历看人生,岁月留痕迹,人生留轨迹,17的历史,18的豪情,时间的匆忙,人生的风景,放开心胸往前走,成功再远行,放开理想往前走,梦想再行动。
现在位置:首页 > 杂货分享 > 个人随笔 > php爬取豆瓣电影信息

php爬取豆瓣电影信息

龙行    个人随笔    2019-6-21    275    0评论    百度未收录

这个呢就是一个api接口,哈哈


<?php
require "./Res.php";
$name = "千与千寻";
$url = "https://movie.douban.com/j/subject_suggest?q=".$name;
$curl = curl_init(); // 启动一个CURL会话
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HEADER, 0);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); // 跳过证书检查
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);  // 从证书中检查SSL加密算法是否存在
$tmpInfo = curl_exec($curl);     //返回api的json对象
$tmpInfo = json_decode($tmpInfo);

// var_dump($tmpInfo);die;
$arrat_res = [];
foreach ($tmpInfo as $v) {
    if ($name == $v->title) {
        $arrat_res[] = $v;
    }
}
if (empty($arrat_res)) {
    $data = [
        "code"=>10001,
        "msg"=>"暂无片源信息"
    ];
    echo json_encode($data);die;
}
$url2 = $arrat_res[0]->url;
curl_setopt($curl, CURLOPT_URL, $url2);
curl_setopt($curl, CURLOPT_HEADER, 0);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); // 跳过证书检查
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);  // 从证书中检查SSL加密算法是否存在
$tmpInfo2 = curl_exec($curl);     //返回api的json对象

if (!$tmpInfo2) {
    echo "<br />cURL error number:" .curl_errno($curl);
    echo "<br />cURL error:" . curl_error($curl);
    exit;
}
//创建一个DomDocument对象,用于处理一个HTML
$dom = new DOMDocument();
//从一个字符串加载HTML
@$dom->loadHTML($tmpInfo2);
//使该HTML规范化
$dom->normalize();

//用DOMXpath加载DOM,用于查询
$xpath = new DOMXPath($dom);

//获取导演信息
$directors = $xpath->evaluate("//*[@id='info']/span[1]/span[2]/a/text()");
$directors_res = "";
for ($i = 0; $i < $directors->length; $i++) {
    $director = $directors->item($i);
    $director = $director->nodeValue;
    if ($i != 0) {
        $directors_res = $directors_res.",".$director;
    }else{
        $directors_res = $director;
    }

}

//名称
$name = $xpath->evaluate("//*[@id='content']/h1/span[1]/text()");
if (!empty($name->length)) {
    $name = $name->item(0)->nodeValue;
}

//年份
$years = $xpath->evaluate("//*[@id='content']/h1/span[2]/text()");
if (!empty($years->length)) {
    $years = $years->item(0)->nodeValue;
}

//海报
//*[@id="mainpic"]/a/img
$img = $xpath->evaluate("//*[@id='mainpic']/a/img/@src");
if (!empty($img->length)) {
    $img = $img->item(0)->nodeValue;
}
// var_dump($img);die;
//是否上映
//*[@id="interest_sectl"]/div/div[2]/div/div[2]
$is_on = $xpath->evaluate("//*[@id='interest_sectl']/div/div[2]/div/div[2]");
if (!empty($is_on->length)) {
    $is_on = $is_on->item(0)->nodeValue;
    if (trim($is_on) == "尚未上映") {
        $is_on = 1;
    }else{
        $is_on = 2;
    }
}
// var_dump($is_on);die;

//获取编剧信息
$screenwriters = $xpath->evaluate("//*[@id='info']/span[2]/span[2]/a/text()");
$screenwriters_res = "";
for ($i = 0; $i < $screenwriters->length; $i++) {

    $screenwriter = $screenwriters->item($i);
    $screenwriter = $screenwriter->nodeValue;
    if ($i != 0) {
        $screenwriters_res = $screenwriters_res. ",".$screenwriter;
    }else{
        $screenwriters_res = $screenwriter;
    }
}

//获取演员信息
//*[@id="info"]/span[3]/span[2]/span[1]/a
$actors = $xpath->query("//*[@id='info']/span[3]/span[2]");
$actors_res = "";
for ($i = 0; $i < $actors->length; $i++) {
    $actor = $actors->item($i);
    $actor = $actor->nodeValue;
    if ($i != 0) {
        $actors_res = $actors_res. ",".$actor;
    }else{
        $actors_res = $actor;
    }
}

// $types = $xpath->query("//*[@id='info']/span[30]");
// var_dump($types->item(0)->nodeValue);die;
//获取类型
$getfunction = new getFunction();
$sear_res = $getfunction->getRes(5,"制片国家/地区:",$xpath);
$types_res = $sear_res["res"];
$num = $sear_res["num"];

//获取语言
$attr = [];
$langs = $xpath->evaluate("//*[@id='info']/text()");
for ($i = 0; $i < $langs->length; $i++) {
    $lang = $langs->item($i);
    $lang = $lang->nodeValue;
    if (preg_match('/[\x{4e00}-\x{9fa5}]/u', $lang)>0) {
        $attr [] = $lang;
    }
}
// var_dump($attr);die;
// if (count($attr) == 3) {
//   // code...
// }
if ($is_on == 1) {
    $show_res = "";
    $sear2_res = $getfunction->getRes($num+4,"又名:",$xpath);
    $time_res = $sear2_res["res"];
    $num = $sear2_res["num"];

}else{
    //获取上映时间
    $sear2_res = $getfunction->getRes($num+4,"片长:",$xpath);
    $time_res = $sear2_res["res"];
    $num = $sear2_res["num"];

    //时长
    $sear3_res = $getfunction->getRes($num+1,"又名:",$xpath);
    $show_res = $sear3_res["res"];
    $num = $sear3_res["num"];
}

if (count($attr) == 4) {
    $show_res = $show_res.$attr[2];
    $country = $attr[0];
    $languages = $attr[1];
    $byname = $attr[3];
}else{
    $country = $attr[0];
    $languages = $attr[1];
    $byname = $attr[2];
}

$imbd = "";
$urlim = $xpath->evaluate("//*[@id='info']/a[2]/@href");
if (!empty($urlim->length)) {
    $urlim = $urlim->item(0)->nodeValue;
    //获取url
    $urls = "";
    $urls = $xpath->evaluate("//*[@id='info']/a[1]/@href");
    if (!empty($urls->length)) {
        $urls = $urls->item(0)->nodeValue;
    }
}else{
    $urls = "";
    $urlim = $xpath->evaluate("//*[@id='info']/a[1]/@href");
    if (!empty($urlim->length)) {
        $urlim = $urlim->item(0)->nodeValue;
    }
}

$final_res = [
    "all_name" => $name.$years,
    "name" => $name,
    "year" => $years,
    "img" => $img,
    "directors" => $directors_res,
    "screenwriters" => $screenwriters_res,
    "actors" => $actors_res,
    "types" => $types_res,
    "web_url" => $urls,
    "country" => $country,
    "languages" => $languages,
    "ontime" => $time_res,
    "showtime" => $show_res,
    "byname" => $byname,
    "imbd" => $urlim
];

$return = ["code"=>0, "msg"=>"抓取成功", "data"=>$final_res ];
echo json_encode($return,true);
Res.php



<?php

class getFunction{
    public static function getRes($start,$key,$xpath){
        $res = "";
        $num = "";
        // $key = "官方网站:";
        for($i = $start; $i<30; $i++ ){
            $types = $xpath->query("//*[@id='info']/span[".$i."]");
            if (!empty($types->length)) {
                $info_res = $types->item(0)->nodeValue;
                if ($info_res == $key) {
                    $num = $i;
                }elseif ($info_res == "官方网站:") {
                    $num = $i;
                }else{
                    if(empty($num)){
                        if ($i != $start) {
                            $res = $res. ",".$info_res;
                        }else{
                            $res = $info_res;
                        }
                    }
                }
            }
        }
        $data = ["res"=>$res,"num"=>$num];
        return $data;
    }
}


评论一下 分享本文 赞助站长

赞助站长X

扫码赞助站长
联系站长
龙行博客
  • 版权申明:此文如未标注转载均为本站原创,自由转载请表明出处《龙行博客》。
  • 本文网址:https://www.liaotaoo.cn/257.html
  • 上篇文章:root被我禁止远程登录了
  • 下篇文章:git pull一直弹出vim编辑器解决日记
  • php7
挤眼 亲亲 咆哮 开心 想想 可怜 糗大了 委屈 哈哈 小声点 右哼哼 左哼哼 疑问 坏笑 赚钱啦 悲伤 耍酷 勾引 厉害 握手 耶 嘻嘻 害羞 鼓掌 馋嘴 抓狂 抱抱 围观 威武 给力
提交评论

清空信息
关闭评论
快捷导航
联系博主
在线壁纸
给我留言
四四五五
音乐欣赏
返回顶部