IMPORT STD; //Import string library used to clean data here //NOTE: in the original XML download, on line 35395 there is corrupt text //that will halt this process. Before proceeding, load the XML file into //your favorite editor and remove the following line: // //XML Definitions TrackRecord := RECORD STRING disc{xpath('disc/@number')}; STRING number{xpath('@number')}; STRING tracktitle{xpath('@title')}; END; ItemRecord := RECORD STRING id{xpath('@id')}; STRING rtype{xpath('@type')}; STRING title{xpath('title')}; STRING genre{xpath('genre')}; STRING releaseDate{xpath('releaseDate')}; STRING formats{xpath('formats')}; STRING label{xpath('label')}; STRING CatalogNumber{xpath('cataloguenumber')}; STRING producers{xpath('producers')}; STRING guestmusicians{xpath('guestmusicians')}; STRING description{xpath('description')}; DATASET(TrackRecord) Tracks{XPATH('tracklisting/track'),MAXCOUNT(653)}; //There is an Elvis Box Set with 30 discs and 653 tracks! STRING coversrc{xpath('cover/@src')}; END; ArtistRecord := RECORD STRING name{xpath('@name')}; DATASET(ItemRecord) Items{XPATH('item'),MAXCOUNT(50)}; END; //DATASET allows us to clean and process XML further d := DATASET('~music::in::bandsartists', ArtistRecord,XML('musicmoz/category')); Cleand := d(EXISTS(Items), STD.Str.Find(name, '/Discography') >= 1, STD.Str.Find(name, '/Reviews') = 0, STD.Str.Find(name, '/Links') = 0, STD.Str.Find(name, '/BootlegReleases') = 0); CleanD CleanArtist(ArtistRecord L) := TRANSFORM //SELF.Recid := C; TempString := L.Name[21..]; InstanceSlash := STD.Str.Find(TempString,'/'); //string library EndPos := InstanceSlash - 1; SELF.Name := TempString[1 .. EndPos]; SELF := L; END; Clean_BandsArtists := PROJECT(CleanD,CleanArtist(LEFT)); OUTPUT(Clean_BandsArtists(name[1 .. 5] = 'Simon'),NAMED('Simon_Search')); OUTPUT(Clean_BandsArtists(name[1 .. 5] = 'Sting'),NAMED('Sting_Search')); OUTPUT(Clean_BandsArtists(name[1 .. 7] = 'Beatles'),NAMED('Beatles'));