php-src/ext/tidy/examples/urlgrab5.php
John Coggeshall 2f4ca9a2d4 Removed examples which don't actually work anymore for PHP 4, and
updated those that do for PHP 5.
2004-08-28 18:10:21 +00:00

40 lines
820 B
PHP

<?php
/*
* urlgrab5.php
*
* A simple command-line utility to extract all of the URLS contained
* within <A HREF> tags from a document.
*
* NOTE: Only works with tidy for PHP 5, please see urlgrab.php for tidy for PHP 4.3.x
*
* By: John Coggeshall <john@php.net>
*
* Usage: php urlgrab5.php <file>
*
*/
function dump_nodes(tidyNode $node, &$urls = NULL) {
$urls = (is_array($urls)) ? $urls : array();
if(isset($node->id)) {
if($node->id == TIDY_TAG_A) {
$urls[] = $node->attribute['href'];
}
}
if($node->hasChildren()) {
foreach($node->child as $c) {
dump_nodes($c, $urls);
}
}
return $urls;
}
$a = tidy_parse_file($_SERVER['argv'][1]);
$a->cleanRepair();
print_r(dump_nodes($a->html()));
?>