php视频网站swf解析插件VideoUrlParser
根据原作者分享的插件VideoUrlParser修改,目前支持优酷,56,sohu,爱奇艺等视频网站的解析。
如有改进或者建议欢迎留言。
class VideoUrlParser
{
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7';
const CHECK_URL_VALID = "/(youku\.com|tudou\.com|56\.com|(my\.)?tv\.sohu\.com|v\.qq\.com|youtube\.com|iqiyi\.com)/";
/**
* parse
*
* @param string $url
* @static
* @access public
* @return void
*/
static public function parse($url = '')
{
$lowerurl = strtolower($url);
if (strstr($lowerurl, '.swf'))
{
$data['swf'] = $url;
return $data;
}
preg_match(self::CHECK_URL_VALID, $lowerurl, $matches);
if (!$matches)
{
return false;
}
switch ($matches[1])
{
case 'youku.com' :
$data = self::_parseYouku($url);
break;
case 'tudou.com' :
$data = self::_parseTudou($url);
break;
case '56.com' :
$data = self::_parse56($url);
break;
case 'my.tv.sohu.com' :
case 'tv.sohu.com' :
case 'sohu.com' :
$data = self::_parseSohu($url);
break;
case 'v.qq.com' :
$data = self::_parseQq($url);
break;
case 'youtube.com' :
$data = self::_parseYoutube($url);
break;
case 'iqiyi.com' :
$data = self::_parseQiyi($url);
break;
default :
return $url;
}
if ($data)
{
if ($data['iframe'])
{
return $data['iframe'];
}
else
{
return $data;
}
}
return false;
}
/**
* 腾讯视频
* http://v.qq.com/cover/o/o9tab7nuu0q3esh.html?vid=97abu74o4w3_0
* http://v.qq.com/play/97abu74o4w3.html
* http://v.qq.com/cover/d/dtdqyd8g7xvoj0o.html
* http://v.qq.com/cover/d/dtdqyd8g7xvoj0o/9SfqULsrtSb.html
* http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?_v=20110829&vid=97abu74o4w3&autoplay=1&list=2&showcfg=1&tpid=23&title=%E7%AC%AC%E4%B8%80%E7%8E%B0%E5%9C%BA&adplay=1&cid=o9tab7nuu0q3esh
*/
static private function _parseQq($url)
{
$html = self::_fget($url);
preg_match('/vid: "(\w+)"/i', $html, $matches);
if (!$vid = $matches[1])
{
return false;
}
preg_match('/<h1 class="mod_player_title" title="(.+)" id="h1_title">/i', $html, $matches);
$data['title'] = $matches[1];
preg_match('/pic :"(.+)"/i', $html, $matches);
$data['img'] = $matches[1];
$data['url'] = $url;
$data['swf'] = 'http://static.video.qq.com/TPout.swf?vid=' . $vid . '&auto=0';
return $data;
}
/**
* 腾讯视频
http://player.video.qiyi.com/b98d57858cc44afdaa8a38a87d212c46/0/0/lvyou/20120831/d9f00f1388701c05.swf-albumId=198451-tvId=263772-isPurchase=0-cnId=9
*/
static private function _parseQiyi($url)
{
$html = self::_fget($url);
preg_match('/data-player-videoid="(\w+)"/i', $html, $matches);
if (!$videoid = $matches[1])
{
return false;
}
preg_match('/data-player-tvid="(\w+)"/i', $html, $matches);
$tvid = $matches[1];
preg_match('/albumId="(\w+)"/i', $html, $matches);
$albumId = $matches[1];
preg_match('/<h1 class="mod_player_title" title="(.+)" id="h1_title">/i', $html, $matches);
$data['title'] = $matches[1];
preg_match('/pic :"(.+)"/i', $html, $matches);
$data['img'] = $matches[1];
$data['url'] = $url;
$data['swf'] = 'http://player.video.qiyi.com/'.$videoid.'/0/0/'.$swf.'.swf-albumId='.$albumId.'-tvId='.$tvid.'-isPurchase=0-cnId=9';
return $data;
}
/**
* 优酷网
* http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html
* http://player.youku.com/player.php/sid/XMjU0NjI2Njg4/v.swf
*/
static private function _parseYouku($url)
{
preg_match("#id\_(\w+(?:==)?)#", $url, $matches);
if (empty($matches))
{
preg_match("#v_playlist\/#", $url, $mat);
if (!$mat)
{
return false;
}
$html = self::_fget($url);
preg_match("#videoId2\s*=\s*\'(\w+)\'#", $html, $matches);
if (!$matches)
{
return false;
}
}
return array(
'url' => $url,
'swf' => "http://player.youku.com/player.php/sid/{$matches[1]}/v.swf"
);
$link = "http://v.youku.com/player/getPlayList/VideoIDS/{$matches[1]}/timezone/+08/version/5/source/out?password=&ran=2513&n=3";
$retval = self::_cget($link);
if ($retval)
{
$json = json_decode($retval, true);
$data['img'] = $json['data'][0]['logo'];
$data['title'] = $json['data'][0]['title'];
$data['url'] = $url;
$data['swf'] = "http://player.youku.com/embed/{$matches[1]}";
return $data;
}
else
{
return false;
}
}
/**
* 土豆网
* http://www.tudou.com/programs/view/Wtt3FjiDxEE/
* http://www.tudou.com/v/Wtt3FjiDxEE/v.swf
*
* http://www.tudou.com/playlist/p/a65718.html?iid=74909603
* http://www.tudou.com/l/G5BzgI4lAb8/&iid=74909603/v.swf
*/
static private function _parseTudou($url)
{
$html = self::_fget($url);
preg_match('/icode: \'(\w+)\'/i', $html, $matches);
$icode = $matches[1];
if (!$icode) return false;
preg_match('/kw: \'(.+)\'/i', $html, $matches);
$data['title'] = $matches[1];
preg_match('/pic: \'(.+)\'/i', $html, $matches);
$data['img'] = $matches[1];
$data['url'] = $url;
$data['swf'] = 'http://www.tudou.com/v/' . $icode . '/';
return $data;
}
/**
* 56网
* http://www.56.com/u73/v_NTkzMDcwNDY.html
* http://player.56.com/v_NTkzMDcwNDY.swf
*/
static private function _parse56($url)
{
$html = self::_fget($url);
preg_match("/vid: '(\w+)'/i", $html, $matches);
//preg_match("#/v_(\w+)\.html#", $url, $matches);
if (empty($matches))
{
return false;
}
$data['url'] = $url;
$data['swf']="http://share.vrs.sohu.com/my/v.swf&topBar=1&id=82948356&autoplay=false&from=page";
//$data['swf'] = "http://player.56.com/v_{$matches[1]}.swf";
return $data;
}
// 搜狐TV http://my.tv.sohu.com/u/vw/5101536
static private function _parseSohu($url)
{
$html = iconv('GBK', 'UTF-8', self::_fget($url));
preg_match_all('#<meta property="og:(title|image|videosrc)" content="(.+)" />#i', $html, $matches);
$data['img'] = $matches[2][2];
$data['title'] = $matches[2][1];
$data['url'] = $url;
$data['swf'] = $matches[2][0];
return $data;
}
static private function _parseYoutube($url)
{
preg_match("#\?v=([0-9a-zA-Z_\-]+)#", $url, $matches);
if (!$matches[1])
{
return false;
}
//$contents = self::_fget($url);
//preg_match_all("#<title>([^<]+)<\/title>#", $contents, $contentMatches);
$data['img'] = "http://img.youtube.com/vi/{$matches[1]}/0.jpg";
//$data['title'] = $contentMatches[1][0];
$data['url'] = $url;
$data['iframe'] = "http://www.youtube-nocookie.com/embed/{$matches[1]}";
return $data;
}
/*
* 通过 file_get_contents 获取内容
*/
static private function _fget($url = '')
{
if (!$url)
{
return false;
}
$html = self::_vita_get_url_content($url);
// 判断是否gzip压缩
if ($dehtml = self::_gzdecode($html))
{
return $dehtml;
}
else
{
return $html;
}
}
/*
* 通过 fsockopen 获取内容
*/
static private function _fsget($path = '/', $host = '', $user_agent = '')
{
if (!$path || !$host)
{
return false;
}
$user_agent = $user_agent ? $user_agent : self::USER_AGENT;
$out = <<<HEADER
GET $path HTTP/1.1
Host: $host
User-Agent: $user_agent
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language: zh-cn,zh;q=0.5
Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7\r\n\r\n
HEADER;
$fp = @fsockopen($host, 80, $errno, $errstr, 10);
if (!$fp)
return false;
if (!fputs($fp, $out))
return false;
while (!feof($fp))
{
$html .= fgets($fp, 1024);
}
fclose($fp);
// 判断是否gzip压缩
if ($dehtml = self::_gzdecode($html))
return $dehtml;
else
return $html;
}
/*
* 通过 curl 获取内容
*/
static private function _cget($url = '', $user_agent = '')
{
if (!$url)
{
return;
}
$user_agent = $user_agent ? $user_agent : self::USER_AGENT;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
if (strlen($user_agent))
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
ob_start();
curl_exec($ch);
$html = ob_get_contents();
ob_end_clean();
if (curl_errno($ch))
{
curl_close($ch);
return false;
}
curl_close($ch);
if (!is_string($html) || !strlen($html))
{
return false;
}
return $html;
// 判断是否gzip压缩
if ($dehtml = self::_gzdecode($html))
return $dehtml;
else
return $html;
}
static private function _vita_get_url_content($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
$file_contents = curl_exec($ch);
curl_close($ch);
return $file_contents;
}
static private function _gzdecode($data)
{
$len = strlen($data);
if ($len < 18 || strcmp(substr($data, 0, 2), "\x1f\x8b"))
{
return null; // Not GZIP format (See RFC 1952)
}
$method = ord(substr($data, 2, 1)); // Compression method
$flags = ord(substr($data, 3, 1)); // Flags
if ($flags & 31 != $flags)
{
// Reserved bits are set -- NOT ALLOWED by RFC 1952
return null;
}
// NOTE: $mtime may be negative (PHP integer limitations)
$mtime = unpack("V", substr($data, 4, 4));
$mtime = $mtime[1];
$xfl = substr($data, 8, 1);
$os = substr($data, 8, 1);
$headerlen = 10;
$extralen = 0;
$extra = "";
if ($flags & 4)
{
// 2-byte length prefixed EXTRA data in header
if ($len - $headerlen - 2 < 8)
{
return false; // Invalid format
}
$extralen = unpack("v", substr($data, 8, 2));
$extralen = $extralen[1];
if ($len - $headerlen - 2 - $extralen < 8)
{
return false; // Invalid format
}
$extra = substr($data, 10, $extralen);
$headerlen += 2 + $extralen;
}
$filenamelen = 0;
$filename = "";
if ($flags & 8)
{
// C-style string file NAME data in header
if ($len - $headerlen - 1 < 8)
{
return false; // Invalid format
}
$filenamelen = strpos(substr($data, 8 + $extralen), chr(0));
if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8)
{
return false; // Invalid format
}
$filename = substr($data, $headerlen, $filenamelen);
$headerlen += $filenamelen + 1;
}
$commentlen = 0;
$comment = "";
if ($flags & 16)
{
// C-style string COMMENT data in header
if ($len - $headerlen - 1 < 8)
{
return false; // Invalid format
}
$commentlen = strpos(substr($data, 8 + $extralen + $filenamelen), chr(0));
if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8)
{
return false; // Invalid header format
}
$comment = substr($data, $headerlen, $commentlen);
$headerlen += $commentlen + 1;
}
$headercrc = "";
if ($flags & 1)
{
// 2-bytes (lowest order) of CRC32 on header present
if ($len - $headerlen - 2 < 8)
{
return false; // Invalid format
}
$calccrc = crc32(substr($data, 0, $headerlen)) & 0xffff;
$headercrc = unpack("v", substr($data, $headerlen, 2));
$headercrc = $headercrc[1];
if ($headercrc != $calccrc)
{
return false; // Bad header CRC
}
$headerlen += 2;
}
// GZIP FOOTER - These be negative due to PHP's limitations
$datacrc = unpack("V", substr($data, -8, 4));
$datacrc = $datacrc[1];
$isize = unpack("V", substr($data, -4));
$isize = $isize[1];
// Perform the decompression:
$bodylen = $len - $headerlen - 8;
if ($bodylen < 1)
{
// This should never happen - IMPLEMENTATION BUG!
return null;
}
$body = substr($data, $headerlen, $bodylen);
$data = "";
if ($bodylen > 0)
{
switch ($method)
{
case 8 :
// Currently the only supported compression method:
$data = gzinflate($body);
break;
default :
// Unknown compression method
return false;
}
}
else
{
//...
}
if ($isize != strlen($data) || crc32($data) != $datacrc)
{
// Bad format! Length or CRC doesn't match!
return false;
}
return $data;
}
}


