#!/usr/bin/perl
##############################################################################
# Perl Script to produce a listing of the free mp3s available on emusic.com.
#
# Written by David Binard, 25-Mar-00
# mailto:binard@california.com
#
# History:
# 25-Mar-00     Original
# 11-May-00     Modified parsing regexps to support emusic.com syntax changes
# 07-Sep-00     Ditto
# 06-Jun-01     Ditto
# 25-Sep-01     Ditto. Also added hyperlink to genre titles.
#
# Note: I'm using lynx to download the HTML source from the Web rather than
# the Perl HTTP module because it's easier when you're behind a firewall.
##############################################################################

$| = 1; # buffer I/O
$outfile = "free_emusic.html";
$base_url = "http://www.emusic.com";
$free_content_url = "http://content.emusic.com";
#$next_img = "next_big.gif";
$next_img = "<b>NEXT &raquo";
$localtime = localtime();
$lynx = "lynx";

%genres = (
"1" => "Rock/Pop",
"3" => "Classical",
"5" => "Electronic",
"6" => "Urban/Hip Hop",
"8" => "Alternative/Punk",
"10" => "Country/Folk",
"11" => "New Age",
"12" => "World/Reggae",
"13" => "Soundtracks/Other",
"14" => "Inspirational",
"15" => "Jazz",
"16" => "Blues"
);

sub get_free_tracks {
   print "\tChecking $artist, $album\n";
   open(IN,"$lynx -source $album_url|") || die("Can't open input: $!");
   while (<IN>) {
      chomp($_);
      if (/$free_content_url/) {
         ($track_url = $_) =~ s/(^.*$free_content_url\/.*\/free\/)([^"]*)(.*)$/$2/i;
         $track_url = "$free_content_url/free/$track_url";
         $track_url =~ s/^\s+//;
         $track_url =~ s/\s+$//;
         ($track = $title_line) =~ s/(^.*>)([^><]+)(<\/FONT.*)$/$2/i;
         $track =~ s/^\s+//;
         $track =~ s/\s+$//;
         print "\t\tFound $artist, $album: $track\n";
         print OUT <<EOM;
<TR>
   <TD><A HREF="$artist_url" TARGET="_blank">$artist</A>
   <TD><A HREF="$album_url" TARGET="_blank">$album</A>
   <TD><A HREF="$track_url">$track</A><BR>
</TR>
EOM
      } elsif (!/[><]/ && !/^\s*$/ && !/\d:\d\d/) {
         $title_line = $_;
      }
   }
}

sub get_albums {
   print "\nProcessing $genre\n\n";
   print OUT <<EOM;
<H2>
<A HREF="$url" TARGET="_blank">$genre</A>
</H2>
<H3>
<TABLE WIDTH="100%" BORDER>
<TR>
   <TH WIDTH="33%">Artist
   <TH WIDTH="33%">Album
   <TH WIDTH="33%">Track
</TR>
EOM
   while ($url ne '') {
      print "Getting album list from $url\n";
      open(IN,"$lynx -source $url|") || die("Can't open input: $!");
      $url = '';
      while (<IN>) {
         chomp($_);
         if (/$next_img/) {
            ($url = $_) =~ s/(^.*HREF="\/.*;\/)(genres\/[\d\/]*)(.*$)/$2/i;
            $url = "$base_url/$url";
#print "DEBUG: $url\n";
         }
         $line = $_;
         if (s/(^.*<a href="\/.*;\/)(artists\/\d*)(.*)$/$2/i) {
            $artist_line = $line;
         } else {
            $_ = $line;
            if (s/(^.*<a href="\/.*;\/)(albums\/\d*)(.*)$/$2/i) {
               $album_line = $line;
               push (@albums, "$artist_line $album_line");
            }
         }
      }
      foreach (@albums) {
         ($artist = $_) =~ s/(^.*<a href="\/.*;\/)(artists\/[^"]*\">)(.*)$/$3/i;
         $artist =~ s/(^[^<]*)(.*)$/$1/i;
         $artist =~ s/^\s+//;
         $artist =~ s/\s+$//;
         if ($artist eq '') { 
            if (($artist = $_) =~ s/^(.*>)([^><]+)(<\/FONT.*)$/$2/i) {
               $artist_url = "javascript:alert('Sorry, there is no URL available for $artist on emusic.com')";
            } else {
               $artist = $old_artist;
               $artist_url = $old_artist_url;
            }
         } else {
            ($artist_url = $_) =~ s/(^.*<a href="\/.*;\/)(artists\/[^"]*)(.*)$/$2/i;
            $artist_url = "$base_url/$artist_url";
         }
         ($album_url = $_) =~ s/(^.*<a href="\/.*;\/)(albums\/[^"]*)(.*)$/$2/i;
         $album_url = "$base_url/$album_url";
         ($album = $_) =~ s/(^.*<a href="\/.*;\/)(albums\/[^"]*\">)(.*)$/$3/i;
         $album =~ s/(^[^<]*)(.*)$/$1/i;
         $album =~ s/^\s+//;
         $album =~ s/\s+$//;
         $old_artist = $artist;
         $old_artist_url = $artist_url;
         get_free_tracks();
      }
      undef(@albums);
   }
   print OUT <<EOM;
</TABLE>
</H3>
EOM
}

# main
open(OUT, ">$outfile") || die("Can't open $outfile for output: $!\n");
select((select(OUT), $| = 1)[0]); # buffer I/O
print OUT <<EOM;
<HTML>
<HEAD>
<TITLE> Free mp3s from emusic.com </TITLE>
</HEAD>
<CENTER>
<H1>Free mp3s from <A HREF="http://www.emusic.com" TARGET="_blank">emusic.com</A></H1>
<H2>
by <A HREF="http://www.california.com/~binard/perl/free_emusic.pl" TARGET="_blank">free_emusic.pl</A><BR>
</H2>
<H3>
$localtime
</H3>
</CENTER>
Please note that left-clicking on the link to the mp3 may not work, as emusic.com seems to now check the referrer address, and redirect the request to another page if it didn't come from their site.
<BR>
An easy workaround is to <B>right-click and "Save As"</B>, or save this HTML file to your local file system and load it from there (in which case no referrer is involved). You can also click on the link to the album, and download the free track from there.
<CENTER>
<HR>
EOM

foreach $key (keys %genres) {
   $genre = $genres{"$key"};
   $url = "$base_url/genres/$key";
   get_albums();
}

print OUT <<EOM;
</BODY>
</HTML>
EOM
close(OUT);
