db = new ParlDB;
$temp1 = $source;
# Set up various variables
$capsword = "[A-Z][a-zA-Z'0-9]*"; # not starting with number, as catches too much
$fillerwords = "of|and|in|on|under|the";
$middlewordre = "(?:$capsword|$fillerwords)\s*";
$endwordre = "(?:$capsword)\s*"; # and, of etc. can't appear at ends
# Match either "Two Endwords" or "Endword and Some Middle Words"
$greedyproperre = "/\b($endwordre(?:$middlewordre)*$endwordre)\b/ms";
# Match without filler words (so if you have a phrase like
# "Amnesty International and Human Rights Watch" you also get both parts
# separately "Amnesty International" and "Human Rights Watch")
$frugalproperre = "/\b((?:$endwordre){2,})\b/ms";
preg_match_all($greedyproperre, $temp1, $propernounphrases1);
preg_match_all($frugalproperre, $temp1, $propernounphrases2);
# Three Letter Acronyms
preg_match_all("/\b([A-Z]{2,})/ms", $temp1, $acronyms);
# We don't want no steenking duplicates
$phrases = array_unique(array_merge($propernounphrases1[0], $propernounphrases2[0], $acronyms[0]));
# Sort into order, largest first
usort($phrases, "lensort");
# Open up a db connection, and whittle our list down even further, against
# the real titles.
$matched = array();
global $DATABASE;
global $HOST;
global $USER;
global $PASSWORD;
if (!($connection = @ mysql_connect ($HOST, $USER, $PASSWORD)))
die("could not connect");
if (!(@ mysql_select_db ("wp", $connection))) die("no database found");
foreach ($phrases as $phrase) {
$phrase = trim($phrase);
$wikistring = ereg_replace(" ","_", $phrase);
if (!($match = mysql_query ("SELECT * FROM titles WHERE title = '". mysql_escape_string($wikistring). "'", $connection)))
die("something went wrong");
while ($row = @ mysql_fetch_array($match, MYSQL_NUM)){
// $q = $this->db->query("SELECT title FROM titles WHERE title = '" . mysql_escape_string($wikistring). "';");
// if ($q->rows > 0) {
$found = $row[0];
if ($found == $wikistring) {
# See if already matched a string this one is contained within
$use = true;
foreach ($matched as $got) {
if (strstr($got, trim($phrase))) {
$use = false;
}
}
# Go ahead only if haven't...
if ($use) {
# 1 means only replace one match for phrase per paragraph
$temp1 = preg_replace ("/{$phrase}/", "
{$phrase}", $temp1, 1);
array_push($matched, $phrase);
}
}
}
}
# clean up links with img tags
// $temp1 = antiTagInTag ($temp1);
return $temp1;
}
function doCosmos ($urlstring){
$techRati = "http://api.technorati.com/cosmos?format=xml&url=$urlstring&key=7e64960cc7e9b1cb4315e56a6544fce7" ;
#echo $techRati;
preg_match ("/\d{7}/", $urlstring, $nums) ;
$cacheFilename= $nums[0];
//if (file_exists("cache/{$cacheFilename}.xml")) {
// is it old?
// $modtime = filemtime("cache/{$cacheFilename}.xml");
// need to figure out improve the logic here so it evaluates correctly and doesn't cache too greedily
if (!file_exists("cache/{$cacheFilename}.xml")|| (time() - filemtime("cache/{$cacheFilename}.xml") > 1800)||filesize ("cache/{$cacheFilename}.xml")==0 ){
#echo "cache miss!";
$a = file($techRati);
set_time_limit (10);
$contents = implode('', $a);
$cachefp = fopen("cache/{$cacheFilename}.xml", "w");
fwrite($cachefp, $contents);
fclose($cachefp);
}
$xml_parser = xml_parser_create();
xml_set_element_handler($xml_parser, "start1Element", "end1Element");
xml_set_character_data_handler($xml_parser, "character1Data");
$fp = fopen("cache/$cacheFilename.xml", "r")
or die("Error reading XML data.");
while ($data = fread($fp, 16384)) {
// Parse each 4KB chunk with the XML parser created above
xml_parse($xml_parser, $data, feof($fp));
#echo "reading...";
}
fclose($fp);
xml_parser_free($xml_parser);
#echo "parsing complete";
global $arItems;
global $itemCount;
// write out the items
#echo count($arItems);
}
function start1Element($parser, $tagName, $attrs) {
global $curTag;
$curTag .= "^$tagName";
// echo $counter." ".$curTag."
";
}
function end1Element($parser, $tagName) {
global $curTag;
$caret_pos = strrpos($curTag,'^');
$curTag = substr($curTag,0,$caret_pos);
// echo $counter." ".$curTag."
";
}
function character1Data($parser, $data) {
global $itemCount, $curTag, $arItems;
$titleKey = "^TAPI^DOCUMENT^ITEM^WEBLOG^NAME";
$permalinkKey = "^TAPI^DOCUMENT^ITEM^NEARESTPERMALINK";
$linkKey = "^TAPI^DOCUMENT^ITEM^WEBLOG^URL";
if ($curTag == $titleKey) {
// make new xItem
$arItems[$itemCount] = new xItem();
// set new item object's properties
$arItems[$itemCount]->xTitle = $data;
}
elseif ($curTag == $permalinkKey) {
$arItems[$itemCount]->xPermalink = $data;
$itemCount++;
}
elseif ($curTag == $linkKey) {
$arItems[$itemCount]->xLink = $data;
#$itemCount++;
}
}
#credit: isaac schlueter (lifted from http://uk2.php.net/strip-tags)
function antiTagInTag( $content = '', $format = 'htmlhead' )
{
if( !function_exists( 'format_to_output' ) )
{ // Use the external function if it exists, or fall back on just strip_tags.
function format_to_output($content, $format)
{
return strip_tags($content);
}
}
$contentwalker = 0;
$length = strlen( $content );
$tagend = -1;
for( $tagstart = strpos( $content, '<', $tagend + 1 ) ; $tagstart !== false && $tagstart < strlen( $content ); $tagstart = strpos( $content, '<', $tagend ) )
{
// got the start of a tag. Now find the proper end!
$walker = $tagstart + 1;
$open = 1;
while( $open != 0 && $walker < strlen( $content ) )
{
$nextopen = strpos( $content, '<', $walker );
$nextclose = strpos( $content, '>', $walker );
if( $nextclose === false )
{ // ERROR! Open waka without close waka!
// echo '
Error in antiTagInTag - malformed tag! ';
return $content;
}
if( $nextopen === false || $nextopen > $nextclose )
{ // No more opens, but there was a close; or, a close happens before the next open.
// walker goes to the close+1, and open decrements
$open --;
$walker = $nextclose + 1;
}
elseif( $nextopen < $nextclose )
{ // an open before the next close
$open ++;
$walker = $nextopen + 1;
}
}
$tagend = $walker;
if( $tagend > strlen( $content ) )
$tagend = strlen( $content );
else
{
$tagend --;
$tagstart ++;
}
$tag = substr( $content, $tagstart, $tagend - $tagstart );
$tags[] = '<' . $tag . '>';
$newtag = format_to_output( $tag, $format );
$newtags[] = '<' . $newtag . '>';
$newtag = format_to_output( $tag, $format );
}
$content = str_replace($tags, $newtags, $content);
return $content;
}
?>