NAME
YAPE::HTML - Yet Another Parser/Extractor for HTML
SYNOPSIS
use YAPE::HTML;
use strict;
my $content = "...";
my $parser = YAPE::HTML->new($content);
my ($extor,@fonts,@urls,@headings,@comments);
# here is the tokenizing part
while (my $chunk = $parser->next) {
if ($chunk->type eq 'tag' and $chunk->tag eq 'font') {
if (my $face = $chunk->get_attr('face')) {
push @fonts, $face;
}
}
}
# here we catch any errors
unless ($parser->done) {
die sprintf "bad HTML: %s (%s)",
$parser->error, $parser->chunk;
}
# here is the extracting part
# tags with HREF attributes
# tags with SRC attributes
$extor = $parser->extract(a => ['href'], img => ['src']);
while (my $chunk = $extor->()) {
push @urls, $chunk->get_attr(
$chunk->tag eq 'a' ? 'href' : 'src'
);
}
#