MusicMoz Artists, Albums and Tracks

This submission showcases the power and support of ECL with XML documents (http://musicmoz.org/xml). The RECORD definitions allow you to drill down to the information that you need, and the string library support allows you to easily clean redundant or obsolete data as needed. The result is a powerful and quick way to search for music artists and their albums and tracks in the HPCC.

IMPORT STD;  //Import string library used to clean data here


//NOTE: in the original XML download, on line 35395 there is corrupt text
//that will halt this process. Before proceeding, load the XML file into
//your favorite editor and remove the following line:
//

//XML Definitions
TrackRecord := RECORD
   STRING disc{xpath('disc/@number')};
   STRING number{xpath('@number')};
   STRING tracktitle{xpath('@title')};
END;

ItemRecord := RECORD
   STRING id{xpath('@id')};
   STRING rtype{xpath('@type')};
   STRING title{xpath('title')};
   STRING genre{xpath('genre')};
   STRING releaseDate{xpath('releaseDate')};
   STRING formats{xpath('formats')};
   STRING label{xpath('label')};
   STRING CatalogNumber{xpath('cataloguenumber')};
   STRING producers{xpath('producers')};
   STRING guestmusicians{xpath('guestmusicians')};
   STRING description{xpath('description')};
   DATASET(TrackRecord) Tracks{XPATH('tracklisting/track'),MAXCOUNT(653)};
   //There is an Elvis Box Set with 30 discs and 653 tracks!
   STRING coversrc{xpath('cover/@src')};
END;

ArtistRecord := RECORD
   STRING name{xpath('@name')};
   DATASET(ItemRecord) Items{XPATH('item'),MAXCOUNT(50)};
END;

//DATASET allows us to clean and process XML further
d := DATASET('~music::in::bandsartists',
   ArtistRecord,XML('musicmoz/category'));


Cleand := d(EXISTS(Items),
   STD.Str.Find(name, '/Discography') >= 1,
   STD.Str.Find(name, '/Reviews') = 0,
   STD.Str.Find(name, '/Links') = 0,
   STD.Str.Find(name, '/BootlegReleases') = 0);

CleanD CleanArtist(ArtistRecord L) := TRANSFORM
   //SELF.Recid := C;
   TempString := L.Name[21..];
   InstanceSlash := STD.Str.Find(TempString,'/'); //string library
   EndPos := InstanceSlash - 1;
   SELF.Name := TempString[1 .. EndPos];
   SELF := L;
   END;

Clean_BandsArtists := PROJECT(CleanD,CleanArtist(LEFT));
OUTPUT(Clean_BandsArtists(name[1 .. 5] = 'Simon'),NAMED('Simon_Search'));
OUTPUT(Clean_BandsArtists(name[1 .. 5] = 'Sting'),NAMED('Sting_Search'));
OUTPUT(Clean_BandsArtists(name[1 .. 7] = 'Beatles'),NAMED('Beatles'));