Data Transformation Basics
An ECL programmer on our team was frustrated because he did not know of a reference file that returned a city name from a zip code entered. While studying our local repository I found a built-in function that returned a comma delimited list of cities for every 5-digit zip code. My project was to take this format and transform it into the single zipcode, city record that my friend was looking for.
For example-
Change this input:
0,'61111','IL','3,LOVES PARK,MACHESNEY PARK,MACHESNEY PK'
To this output:
13,61111,IL,LOVES PARK
14,61111,IL,MACHESNEY PARK
15,61111,IL,MACHESNEY PK
Here is the complete code:
//*****************************************
//Spec: Create a file of zipcodes and related cities
IMPORT Std.Str; //Import String library used in transform
//record definition of raw input data
//since Rtn field is variable length, we can also use this same record
//definition in our output file
Rec := RECORD
UNSIGNED3 CSZID;
STRING5 Zip;
STRING2 State;
STRING Rtn{MAXLENGTH(1024)};
END;
//this data set simulates the data returned from the internal function(s)
FuncDS := DATASET([{0,'00000','',0},
{0,'14513','NY','2,NEWARK,EAST PALMYRA'},
{0,'29710','SC','3,CLOVER,LAKE WYLIE,RIVER HILLS'},
{0,'33334','FL','4,FORT LAUDERDALE,FT LAUDERDALE,OAKLAND PARK,WILTON MANORS'},
{0,'33424','FL','1,BOYNTON BEACH'},
{0,'55555','MN','1,YOUNG AMERICA'},
{0,'60933','IL','1,ELLIOTT'},
{0,'61111','IL','3,LOVES PARK,MACHESNEY PARK,MACHESNEY PK'},
{0,'66604','KS','1,TOPEKA'},
{0,'68836','NE','2,ELBA,COTESFIELD'},
{0,'74652','OK','2,SHIDLER,FORAKER'},
{0,'81252','CO','2,WESTCLIFFE,SILVER CLIFF'},
{0,'99999','',0}],Rec);
//Moves data from FuncDS DATASET to ZipsIn recordset
Rec Xf1(Rec L,INTEGER C) := TRANSFORM
SELF.CSZID := 0;
SELF.Zip := IF(INTFORMAT(c,5,1)= L.ZIP,
L.ZIP,
'');
SELF := L;
END;
//removes blank records - cleans input data
//this function was originally used because we had an internal library
//that returned zip code information for all zip codes. That is why we
//used 100000 as the number of iterations. The use of INTFORMAT changed
//counter values to the zero padded zip format
ZipsIN := NORMALIZE(FuncDS,100000,XF1(LEFT,COUNTER))(Rtn <> '0',
Zip <> '');
//Here is the original code, and why we used NORMALIZE:
/* Rec Xf1(Rec L,INTEGER C) := TRANSFORM
SELF.CSZID := 0;
SELF.Zip := INTFORMAT(c,5,1);
SELF.State := ZipLib.ZipToState2(SELF.Zip);
SELF.Rtn := ZipLib.ZipToCities(SELF.Zip);
END;
ZipsIN := NORMALIZE(BlankDS,100000,XF1(LEFT,COUNTER))(Rtn <> '0');
*/
//This was the input data that we needed to process
OUTPUT(ZipsIN,NAMED('ZipsIn'));
// This TRANSFORM has three local ECL definitions (InstanceComma, StartPos
// and EndPos)
// used to find the commas and parse the city information between them.
Rec XF2(Rec L,INTEGER C) := TRANSFORM
InstanceComma := Str.Find(L.Rtn,',',C+1);
EndPos := IF(InstanceComma=0,LENGTH(TRIM(L.Rtn)),InstanceComma-1);
StartPos := Str.Find(L.Rtn,',',C) + 1;
SELF.Zip := L.Zip;
SELF.Rtn := L.Rtn[StartPos .. EndPos];
SELF := L;
END;
//DISTRIBUTE allows for more efficient processing
//String index of 1 for RTN determines how many times the transform
//gets called
ZipsOut := NORMALIZE(DISTRIBUTE(ZipsIn,HASH32(Zip)),(INTEGER)LEFT.Rtn[1],XF2(LEFT,COUNTER));
Rec XF3(Rec L,INTEGER C) := TRANSFORM
SELF.CSZID := C;
SELF := L;
END;
//Finally, after we NORMALIZE each record we assign a unique ID to each
//one using a simple PROJECT
UIDzips := PROJECT(ZipsOut,XF3(LEFT,COUNTER));
output(UIDZips,,'~TEST::OUT::CityStateZip',OVERWRITE);










