#!/usr/local/bin/perl

use LWP::Simple;
use HTML::LinkExtor;
use lib ".";
use lib "ext";

@links_to_pages_with_word = ();
@links_to_pages_without_word = ();
@all_found_links = ();
$MAX_SEARCH_LEVELS = 2;
$no_of_calls = 0;

$start_url = $ARGV[0];
$word = $ARGV[1];
&go_for_it(($start_url));

# print header('text/plain');
print "\n";
$size = @links_to_pages_with_word;
print "The word: $word does exist on the following: $size pages:\n";
foreach $l (@links_to_pages_with_word) {
   print "  $l\n";
}
print "\n";
$size = @links_to_pages_without_word;
print "The word: $word does not exist on the following: $size pages:\n";
foreach $l (@links_to_pages_without_word) {
   print "  $l\n";
}

sub go_for_it {
   my @urls = @_;
   $no_of_links_on_this_level = @urls;
   $no_of_calls++; if($no_of_calls > $MAX_SEARCH_LEVELS) { return; }

   @all_links_on_next_level = ();
   $current_link_no = 0;
   foreach $url (@urls) {
      $current_link_no++;
      print "Level: $no_of_calls, link: $current_link_no($no_of_links_on_this_level)\n";

      $html = get $url;

      # Finns ordet på denna sida
      $pos = index $html, $word;
      if($pos != -1) {
         print "  Y: $url\n";
         @links_to_pages_with_word = (@links_to_pages_with_word, $url);
      } else {
         print "  N: $url\n";
         @links_to_pages_without_word = (@links_to_pages_without_word, $url);
      }

      # Kolla alla länkar
      @all_links_on_this_page = &get_links($url, $html);
      $size_1 = @all_links_on_this_page;
      @all_links_on_next_level = (@all_links_on_next_level, @all_links_on_this_page);
      $size_2 = @all_links_on_next_level;
      print "  Links on this page: $size_1, links on next level: $size_2\n";
   }
   &go_for_it(@all_links_on_next_level);
}

sub get_links {  
   my ($url, $html) = @_;
   my (%seen);
   $parser = HTML::LinkExtor->new(undef, $url);
   $parser->parse($html)->eof;
   @links = $parser->links;

   foreach $linkarray (@links) {
      @element = @$linkarray;
      $element_type = shift @element;
      while(@element) {
         ($attr_name, $attr_value) = splice(@element, 0, 2);
         $seen{$attr_value}++;
      }
   }

   @ret = ();
   for(sort keys %seen) { 
      @ret = (@ret, $_);
   }
   return @ret;
}