Source code file content
subversion / 2c.lt / stats_functions.php
Size: 1478 bytes, 1 line
<?php
function getExcerpt($content) {
$text = html_entity_decode($content);
$excerpt = array();
//match all tags
preg_match_all("|<[^>]+>(.*)</[^>]+>|", $text, $p, PREG_PATTERN_ORDER);
for ($x = 0; $x < sizeof($p[0]); $x++) {
//match tags containing "<p>"
if (preg_match('<p>i', $p[0][$x])) {
$strip = strip_tags($p[0][$x]);
if (preg_match("/\./", $strip))
$excerpt[] = $strip;
}
if (isset($excerpt[0])){
preg_match("/([^.]+.)/", $strip,$matches);
return $matches[1];
}
}
return false;
}
function getMetaTitle($content){
$pattern = "|<[\s]*title[\s]*>([^<]+)<[\s]*/[\s]*title[\s]*>|Ui";
if(preg_match($pattern, $content, $match))
return $match[1];
else
return false;
}
function getMetaDescription($content) {
$metaDescription = false;
$metaDescriptionPatterns = array("/<meta.+description.+content[\s]*=[\s]*\"([^\"]+)\"[^>]*>/Ui", "/<meta.+description.+content[\s]*=[\s]*'([^']+)'[^>]*>/Ui");
foreach ($metaDescriptionPatterns as $pattern) {
if (preg_match($pattern, $content, $match))
$metaDescription = $match[1];
break;
}
return $metaDescription;
}
$url = 'http://2c.lt/'; //url to scrape
$content = file_get_contents($url);
$title = getMetaTitle($content);
$description = getMetaDescription($content);
$excerpt = getExcerpt($content);
print "title: $title ";
print "<br />";
print "description: $description ";
print "<br />";
print "excerpt: $excerpt";
?>





