ComunicAPI/classes/URLanalyzer.php

74 lines
1.7 KiB
PHP
Raw Permalink Normal View History

2018-01-07 15:23:41 +00:00
<?php
/**
* URL analyzer
*
* Retreivee the source code of a webpage to extracts
* its open graph tags
*
* @author Pierre HUBERT
*/
class URLAnalyzer {
/**
* Analyze a given URL to extract OpenGraph content
*
* @param string $url The URL to analyze
* @param int $timeout The
* @return array Open graph informations in case of success or
* empty array in case of failure
*/
public static function analyze(string $url, int $timeout = 15) : array {
//Initialize curl
$ch = curl_init($url);
//Set timeout
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
2018-07-31 12:08:45 +00:00
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
2018-01-07 15:23:41 +00:00
//Get the response
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
//Perform the request
$source = curl_exec($ch);
if(!$source)
return array();
//Analyze response
preg_match_all('#<meta (.*?)>#is', $source, $results, PREG_PATTERN_ORDER);
//Process results
$list = array();
foreach($results[1] as $entry){
//Remove spaces
$entry = str_replace(array(" =", "= "), "=", $entry);
//Check if the meta tag represent an interest
if(preg_match("/property/", $entry) AND preg_match("/og:/", $entry)){
//Search for property tag and content tag
2018-04-16 14:10:54 +00:00
//"
preg_match("#property=[\"]og:(.*?)[\"]#is", $entry, $matches1);
preg_match("#content=[\"](.*?)[\"]#is", $entry, $matches2);
2018-01-07 15:23:41 +00:00
$name = isset($matches1[1]) ? $matches1[1] : 1;
$value = isset($matches2[1]) ? $matches2[1] : "";
2018-04-16 14:10:54 +00:00
//'
preg_match("#property=[\']og:(.*?)[\']#is", $entry, $matches1);
preg_match("#content=[\'](.*?)[\']#is", $entry, $matches2);
$name = isset($matches1[1]) ? $matches1[1] : $name;
$value = isset($matches2[1]) ? $matches2[1] : $value;
2018-01-07 15:23:41 +00:00
$list[$name] = $value;
}
}
return $list;
}
}